Skip to content

Commit

Permalink
Concrete metrics
Browse files Browse the repository at this point in the history
related to #124

This PR contains several specific metrics
```gaolang
	Health records *prometheus.GaugeVec
	StatusPerIngressHosts *prometheus.GaugeVec
	StatusFailover *prometheus.GaugeVec
	StatusRoundRobin *prometheus.GaugeVec
	StatusGeoIP *prometheus.GaugeVec
	ZoneUpdateTotal *prometheus.CounterVec
	ErrorTotal *prometheus.CounterVec
	ReconciliationTotal prometheus.Counter
```

Signed-off-by: kuritka <kuritka@gmail.com>
  • Loading branch information
kuritka committed Aug 19, 2021
1 parent ac257de commit 1f67641
Show file tree
Hide file tree
Showing 12 changed files with 414 additions and 61 deletions.
1 change: 1 addition & 0 deletions Makefile
Expand Up @@ -289,6 +289,7 @@ manager: lint
mocks:
go install github.com/golang/mock/mockgen@v1.5.0
mockgen -source=controllers/providers/assistant/assistant.go -destination=controllers/providers/assistant/assistant_mock.go -package=assistant
mockgen -source=controllers/providers/dns/dns.go -destination=controllers/providers/dns/dns_mock.go -package=dns
$(call golic)

# remove clusters and redeploy
Expand Down
14 changes: 13 additions & 1 deletion api/v1beta1/gslb_types.go
Expand Up @@ -48,7 +48,7 @@ type GslbSpec struct {
// GslbStatus defines the observed state of Gslb
type GslbStatus struct {
// Associated Service status
ServiceHealth map[string]string `json:"serviceHealth"`
ServiceHealth map[string]HealthStatus `json:"serviceHealth"`
// Current Healthy DNS record structure
HealthyRecords map[string][]string `json:"healthyRecords"`
// Cluster Geo Tag
Expand Down Expand Up @@ -78,6 +78,18 @@ type GslbList struct {
Items []Gslb `json:"items"`
}

type HealthStatus string

const (
Healthy HealthStatus = "Healthy"
Unhealthy HealthStatus = "Unhealthy"
NotFound HealthStatus = "NotFound"
)

func (h HealthStatus) String() string {
return string(h)
}

func init() {
SchemeBuilder.Register(&Gslb{}, &GslbList{})
}
2 changes: 1 addition & 1 deletion api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 19 additions & 3 deletions controllers/dnsupdate.go
Expand Up @@ -34,6 +34,7 @@ func sortTargets(targets []string) []string {
})
return targets
}

func (r *GslbReconciler) gslbDNSEndpoint(gslb *k8gbv1beta1.Gslb) (*externaldns.DNSEndpoint, error) {
var gslbHosts []*externaldns.Endpoint
var ttl = externaldns.TTL(gslb.Spec.Strategy.DNSTtlSeconds)
Expand All @@ -55,7 +56,10 @@ func (r *GslbReconciler) gslbDNSEndpoint(gslb *k8gbv1beta1.Gslb) (*externaldns.D
return nil, fmt.Errorf("ingress host %s does not match delegated zone %s", host, r.Config.EdgeDNSZone)
}

if health == "Healthy" {
isPrimary := gslb.Spec.Strategy.PrimaryGeoTag == r.Config.ClusterGeoTag
isHealthy := health == k8gbv1beta1.Healthy

if isHealthy {
finalTargets = append(finalTargets, localTargets...)
localTargetsHost := fmt.Sprintf("localtargets-%s", host)
dnsRecord := &externaldns.Endpoint{
Expand All @@ -78,10 +82,10 @@ func (r *GslbReconciler) gslbDNSEndpoint(gslb *k8gbv1beta1.Gslb) (*externaldns.D
finalTargets = append(finalTargets, externalTargets...)
case failoverStrategy:
// If cluster is Primary
if gslb.Spec.Strategy.PrimaryGeoTag == r.Config.ClusterGeoTag {
if isPrimary {
// If cluster is Primary and Healthy return only own targets
// If cluster is Primary and Unhealthy return Secondary external targets
if health != "Healthy" {
if !isHealthy {
finalTargets = externalTargets
log.Info().Msgf("Executing failover strategy for %s Gslb on Primary. Workload on primary %s cluster is unhealthy, targets are %v",
gslb.Name, gslb.Spec.Strategy.PrimaryGeoTag, finalTargets)
Expand All @@ -99,6 +103,7 @@ func (r *GslbReconciler) gslbDNSEndpoint(gslb *k8gbv1beta1.Gslb) (*externaldns.D
log.Info().Msgf("No external targets have been found for host %s", host)
}

r.updateRuntimeStatus(gslb, isPrimary, health, finalTargets)
log.Info().Msgf("Final target list for %s Gslb: %v", gslb.Name, finalTargets)

if len(finalTargets) > 0 {
Expand Down Expand Up @@ -134,3 +139,14 @@ func (r *GslbReconciler) gslbDNSEndpoint(gslb *k8gbv1beta1.Gslb) (*externaldns.D
}
return dnsEndpoint, err
}

func (r *GslbReconciler) updateRuntimeStatus(gslb *k8gbv1beta1.Gslb, isPrimary bool, isHealthy k8gbv1beta1.HealthStatus, finalTargets []string) {
switch gslb.Spec.Strategy.Type {
case roundRobinStrategy:
m.UpdateRoundrobinStatus(gslb, isPrimary, isHealthy, finalTargets)
case geoStrategy:
m.UpdateGeoIPStatus(gslb, isPrimary, isHealthy, finalTargets)
case failoverStrategy:
m.UpdateFailoverStatus(gslb, isPrimary, isHealthy, finalTargets)
}
}
9 changes: 9 additions & 0 deletions controllers/gslb_controller.go
Expand Up @@ -84,11 +84,13 @@ func (r *GslbReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
// Return and don't requeue
return result.Stop()
}
m.ErrorIncrement(gslb)
return result.RequeueError(fmt.Errorf("error reading the object (%s)", err))
}

err = r.DepResolver.ResolveGslbSpec(ctx, gslb, r.Client)
if err != nil {
m.ErrorIncrement(gslb)
return result.RequeueError(fmt.Errorf("resolving spec (%s)", err))
}
log.Debug().
Expand Down Expand Up @@ -127,42 +129,49 @@ func (r *GslbReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
// Add finalizer for this CR
if !contains(gslb.GetFinalizers(), gslbFinalizer) {
if err := r.addFinalizer(gslb); err != nil {
m.ErrorIncrement(gslb)
return result.RequeueError(err)
}
}

// == Ingress ==========
ingress, err := r.gslbIngress(gslb)
if err != nil {
m.ErrorIncrement(gslb)
return result.RequeueError(err)
}

err = r.saveIngress(gslb, ingress)
if err != nil {
m.ErrorIncrement(gslb)
return result.RequeueError(err)
}

// == external-dns dnsendpoints CRs ==
dnsEndpoint, err := r.gslbDNSEndpoint(gslb)
if err != nil {
m.ErrorIncrement(gslb)
return result.RequeueError(err)
}

err = r.DNSProvider.SaveDNSEndpoint(gslb, dnsEndpoint)
if err != nil {
m.ErrorIncrement(gslb)
return result.RequeueError(err)
}

// == handle delegated zone in Edge DNS
err = r.DNSProvider.CreateZoneDelegationForExternalDNS(gslb)
if err != nil {
log.Err(err).Msg("Unable to create zone delegation")
m.ErrorIncrement(gslb)
return result.Requeue()
}

// == Status =
err = r.updateGslbStatus(gslb)
if err != nil {
m.ErrorIncrement(gslb)
return result.RequeueError(err)
}

Expand Down
53 changes: 41 additions & 12 deletions controllers/gslb_controller_test.go
Expand Up @@ -27,6 +27,8 @@ import (
"testing"
"time"

"github.com/golang/mock/gomock"

str "github.com/AbsaOSS/gopkg/strings"
k8gbv1beta1 "github.com/AbsaOSS/k8gb/api/v1beta1"
"github.com/AbsaOSS/k8gb/controllers/depresolver"
Expand Down Expand Up @@ -104,7 +106,7 @@ const coreDNSExtServiceName = "k8gb-coredns-lb"
func TestNotFoundServiceStatus(t *testing.T) {
// arrange
settings := provideSettings(t, predefinedConfig)
expectedServiceStatus := "NotFound"
expectedServiceStatus := k8gbv1beta1.NotFound
notFoundHost := "notfound.cloud.example.com"
// act
actualServiceStatus := settings.gslb.Status.ServiceHealth[notFoundHost]
Expand All @@ -118,7 +120,7 @@ func TestUnhealthyServiceStatus(t *testing.T) {
settings := provideSettings(t, predefinedConfig)
serviceName := "unhealthy-app"
unhealthyHost := "unhealthy.cloud.example.com"
expectedServiceStatus := "Unhealthy"
expectedServiceStatus := k8gbv1beta1.Unhealthy
defer deleteUnhealthyService(t, &settings, serviceName)
// act
createUnhealthyService(t, &settings, serviceName)
Expand All @@ -133,7 +135,7 @@ func TestHealthyServiceStatus(t *testing.T) {
// arrange
settings := provideSettings(t, predefinedConfig)
serviceName := "frontend-podinfo"
expectedServiceStatus := "Healthy"
expectedServiceStatus := k8gbv1beta1.Healthy
healthyHost := "roundrobin.cloud.example.com"
defer deleteHealthyService(t, &settings, serviceName)
createHealthyService(t, &settings, serviceName)
Expand All @@ -150,7 +152,7 @@ func TestIngressHostsPerStatusMetric(t *testing.T) {
settings := provideSettings(t, predefinedConfig)
expectedHostsMetricCount := 3
// act
ingressHostsPerStatusMetric := metrics.Metrics().Get("k8gb_gslb_ingress_hosts_per_status").AsGaugeVec()
ingressHostsPerStatusMetric := metrics.Metrics().Get("k8gb_gslb_status_per_ingress_hosts").AsGaugeVec()
err := settings.client.Get(context.TODO(), settings.request.NamespacedName, settings.gslb)
actualHostsMetricCount := testutil.CollectAndCount(ingressHostsPerStatusMetric)
// assert
Expand All @@ -172,9 +174,9 @@ func TestIngressHostsPerStatusMetricReflectionForHealthyStatus(t *testing.T) {
reconcileAndUpdateGslb(t, settings)
// act
err := settings.client.Get(context.TODO(), settings.request.NamespacedName, settings.gslb)
ingressHostsPerStatusMetric := metrics.Metrics().Get("k8gb_gslb_ingress_hosts_per_status").AsGaugeVec()
ingressHostsPerStatusMetric := metrics.Metrics().Get("k8gb_gslb_status_per_ingress_hosts").AsGaugeVec()
healthyHosts := ingressHostsPerStatusMetric.With(prometheus.Labels{"namespace": settings.gslb.Namespace,
"name": settings.gslb.Name, "status": metrics.HealthyStatus})
"name": settings.gslb.Name, "status": k8gbv1beta1.Healthy.String()})
actualHostsMetric := testutil.ToFloat64(healthyHosts)
// assert
assert.NoError(t, err, "Failed to get expected gslb")
Expand All @@ -190,9 +192,9 @@ func TestIngressHostsPerStatusMetricReflectionForUnhealthyStatus(t *testing.T) {
err := settings.client.Get(context.TODO(), settings.request.NamespacedName, settings.gslb)
expectedHostsMetricCount := 0.
// act
ingressHostsPerStatusMetric := metrics.Metrics().Get("k8gb_gslb_ingress_hosts_per_status").AsGaugeVec()
ingressHostsPerStatusMetric := metrics.Metrics().Get("k8gb_gslb_status_per_ingress_hosts").AsGaugeVec()
unhealthyHosts := ingressHostsPerStatusMetric.With(prometheus.Labels{"namespace": settings.gslb.Namespace,
"name": settings.gslb.Name, "status": metrics.UnhealthyStatus})
"name": settings.gslb.Name, "status": k8gbv1beta1.Unhealthy.String()})
actualHostsMetricCount := testutil.ToFloat64(unhealthyHosts)
// assert
assert.NoError(t, err, "Failed to get expected gslb")
Expand All @@ -208,7 +210,7 @@ func TestIngressHostsPerStatusMetricReflectionForUnhealthyStatus(t *testing.T) {
// act
unhealthyHosts =
ingressHostsPerStatusMetric.With(prometheus.Labels{"namespace": settings.gslb.Namespace,
"name": settings.gslb.Name, "status": metrics.UnhealthyStatus})
"name": settings.gslb.Name, "status": k8gbv1beta1.Unhealthy.String()})
actualHostsMetricCount = testutil.ToFloat64(unhealthyHosts)
// assert
assert.Equal(t, expectedHostsMetricCount, actualHostsMetricCount, "expected %v managed hosts with Healthy status, but got %v",
Expand All @@ -228,9 +230,9 @@ func TestIngressHostsPerStatusMetricReflectionForNotFoundStatus(t *testing.T) {
// act
err := settings.client.Get(context.TODO(), settings.request.NamespacedName, settings.gslb)
require.NoError(t, err, "Failed to get expected gslb")
ingressHostsPerStatusMetric := metrics.Metrics().Get("k8gb_gslb_ingress_hosts_per_status").AsGaugeVec()
ingressHostsPerStatusMetric := metrics.Metrics().Get("k8gb_gslb_status_per_ingress_hosts").AsGaugeVec()
unknownHosts, err := ingressHostsPerStatusMetric.GetMetricWith(
prometheus.Labels{"namespace": settings.gslb.Namespace, "name": settings.gslb.Name, "status": metrics.NotFoundStatus})
prometheus.Labels{"namespace": settings.gslb.Namespace, "name": settings.gslb.Name, "status": k8gbv1beta1.NotFound.String()})
require.NoError(t, err, "Failed to get ingress metrics")
actualHostsMetricCount := testutil.ToFloat64(unknownHosts)
// assert
Expand Down Expand Up @@ -270,7 +272,7 @@ func TestHealthyRecordMetric(t *testing.T) {
func TestMetricLinterCheck(t *testing.T) {
// arrange
healthyRecordsMetric := metrics.Metrics().Get("k8gb_gslb_healthy_records").AsGaugeVec()
ingressHostsPerStatusMetric := metrics.Metrics().Get("k8gb_gslb_ingress_hosts_per_status").AsGaugeVec()
ingressHostsPerStatusMetric := metrics.Metrics().Get("k8gb_gslb_status_per_ingress_hosts").AsGaugeVec()
reconciliationTotal := metrics.Metrics().Get("k8gb_gslb_reconciliation_total").AsCounter()
for name, scenario := range map[string]prometheus.Collector{
"healthy_records": healthyRecordsMetric,
Expand Down Expand Up @@ -298,6 +300,33 @@ func TestGslbReconciliationTotalIncrement(t *testing.T) {
assert.Equal(t, cnt+1, cnt2)
}

func TestGslbErrorsIncrement(t *testing.T) {
// arrange
const key = "k8gb_gslb_error_total"
ctrl := gomock.NewController(t)
defer ctrl.Finish()
settings := provideSettings(t, predefinedConfig)
var label = prometheus.Labels{"namespace": settings.gslb.Namespace, "name": settings.gslb.Name}
m := dns.NewMockProvider(ctrl)
cnt := testutil.ToFloat64(metrics.Metrics().Get(key).AsCounterVec().With(label))
m.EXPECT().GslbIngressExposedIPs(gomock.Any()).Return([]string{}, nil).Times(1)
m.EXPECT().SaveDNSEndpoint(gomock.Any(), gomock.Any()).Return(fmt.Errorf("save DNS error")).Times(1)
m.EXPECT().GetExternalTargets(gomock.Any()).Return([]string{}).AnyTimes()
m.EXPECT().CreateZoneDelegationForExternalDNS(gomock.Any()).Return(nil).AnyTimes()
settings.reconciler.DNSProvider = m
// act
_, err := settings.reconciler.Reconcile(context.TODO(), settings.request)
require.Error(t, err)
// let's break it on different place
m.EXPECT().SaveDNSEndpoint(gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
m.EXPECT().GslbIngressExposedIPs(gomock.Any()).Return([]string{}, fmt.Errorf("exposed IP's error")).AnyTimes()
_, err = settings.reconciler.Reconcile(context.TODO(), settings.request)
cnt2 := testutil.ToFloat64(metrics.Metrics().Get(key).AsCounterVec().With(label))
// assert
assert.Error(t, err)
assert.Equal(t, cnt+2, cnt2)
}

func TestGslbCreatesDNSEndpointCRForHealthyIngressHosts(t *testing.T) {
// arrange
serviceName := "frontend-podinfo"
Expand Down

0 comments on commit 1f67641

Please sign in to comment.