diff --git a/internal/addon/addon.go b/internal/addon/addon.go index 3d61c71..8a9f9db 100644 --- a/internal/addon/addon.go +++ b/internal/addon/addon.go @@ -1,12 +1,24 @@ package addon import ( + "errors" + "fmt" + + otelv1alpha1 "github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1" + loggingv1 "github.com/openshift/cluster-logging-operator/apis/logging/v1" "open-cluster-management.io/addon-framework/pkg/agent" "open-cluster-management.io/addon-framework/pkg/utils" addonapiv1alpha1 "open-cluster-management.io/api/addon/v1alpha1" + workv1 "open-cluster-management.io/api/work/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) +var ( + errProbeConditionNotSatisfied = errors.New("probe condition is not satisfied") + errProbeValueIsNil = errors.New("probe value is nil") + errValueNotProbed = errors.New("value not probed") +) + func NewRegistrationOption(agentName string) *agent.RegistrationOption { return &agent.RegistrationOption{ CSRConfigurations: agent.KubeClientSignerConfigurations(Name, agentName), @@ -30,3 +42,90 @@ func GetObjectKey(configRef []addonapiv1alpha1.ConfigReference, group, resource } return key } + +// AgentHealthProber returns a HealthProber struct that contains the necessary +// information to assert if an addon deployment is ready or not. +func AgentHealthProber() *agent.HealthProber { + return &agent.HealthProber{ + Type: agent.HealthProberTypeWork, + WorkProber: &agent.WorkHealthProber{ + ProbeFields: []agent.ProbeField{ + { + ResourceIdentifier: workv1.ResourceIdentifier{ + Group: loggingv1.GroupVersion.Group, + Resource: ClusterLogForwardersResource, + Name: SpokeCLFName, + Namespace: SpokeCLFNamespace, + }, + ProbeRules: []workv1.FeedbackRule{ + { + Type: workv1.JSONPathsType, + JsonPaths: []workv1.JsonPath{ + { + Name: clfProbeKey, + Path: clfProbePath, + }, + }, + }, + }, + }, + { + ResourceIdentifier: workv1.ResourceIdentifier{ + Group: otelv1alpha1.GroupVersion.Group, + Resource: OpenTelemetryCollectorsResource, + Name: SpokeOTELColName, + Namespace: SpokeOTELColNamespace, + }, + ProbeRules: []workv1.FeedbackRule{ + { + Type: workv1.JSONPathsType, + JsonPaths: []workv1.JsonPath{ + { + Name: otelColProbeKey, + Path: otelColProbePath, + }, + }, + }, + }, + }, + }, + HealthCheck: func(identifier workv1.ResourceIdentifier, result workv1.StatusFeedbackResult) error { + for _, value := range result.Values { + switch { + case identifier.Resource == ClusterLogForwardersResource: + if value.Name != clfProbeKey { + continue + } + + if value.Value.String == nil { + return fmt.Errorf("%w: clusterlogforwarder with key %s/%s", errProbeValueIsNil, identifier.Namespace, identifier.Name) + } + + if *value.Value.String != "True" { + return fmt.Errorf("%w: clusterlogforwarder status condition type is %s for %s/%s", errProbeConditionNotSatisfied, *value.Value.String, identifier.Namespace, identifier.Name) + } + + return nil + case identifier.Resource == OpenTelemetryCollectorsResource: + if value.Name != otelColProbeKey { + continue + } + + if value.Value.Integer == nil { + return fmt.Errorf("%w: opentelemetrycollector with key %s/%s", errProbeValueIsNil, identifier.Namespace, identifier.Name) + } + + if *value.Value.Integer < 1 { + return fmt.Errorf("%w: opentelemetrycollector replicas is %d for %s/%s", errProbeConditionNotSatisfied, *value.Value.Integer, identifier.Namespace, identifier.Name) + } + + return nil + default: + continue + } + } + return fmt.Errorf("%w: for resource %s with key %s/%s", errValueNotProbed, identifier.Resource, identifier.Namespace, identifier.Name) + }, + }, + } +} diff --git a/internal/addon/addon_test.go b/internal/addon/addon_test.go new file mode 100644 index 0000000..c8b097e --- /dev/null +++ b/internal/addon/addon_test.go @@ -0,0 +1,100 @@ +package addon + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" + workv1 "open-cluster-management.io/api/work/v1" + + otelv1alpha1 "github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1" + loggingv1 "github.com/openshift/cluster-logging-operator/apis/logging/v1" +) + +func Test_AgentHealthProber_CLF(t *testing.T) { + unhealthyError := fmt.Errorf("%w: clusterlogforwarder status condition type is %s for %s/%s", errProbeConditionNotSatisfied, "False", SpokeCLFNamespace, SpokeCLFName) + for _, tc := range []struct { + name string + status string + expectedErr string + }{ + { + name: "healthy", + status: "True", + }, + { + name: "unhealthy", + status: "False", + expectedErr: unhealthyError.Error(), + }, + } { + t.Run(tc.name, func(t *testing.T) { + healthProber := AgentHealthProber() + err := healthProber.WorkProber.HealthCheck(workv1.ResourceIdentifier{ + Group: loggingv1.GroupVersion.Group, + Resource: ClusterLogForwardersResource, + Name: SpokeCLFName, + Namespace: SpokeCLFNamespace, + }, workv1.StatusFeedbackResult{ + Values: []workv1.FeedbackValue{ + { + Name: "isReady", + Value: workv1.FieldValue{ + Type: workv1.String, + String: &tc.status, + }, + }, + }, + }) + if tc.expectedErr != "" { + require.EqualError(t, err, tc.expectedErr) + return + } + require.NoError(t, err) + }) + } +} + +func Test_AgentHealthProber_OTELCol(t *testing.T) { + unhealthyError := fmt.Errorf("%w: opentelemetrycollector replicas is %d for %s/%s", errProbeConditionNotSatisfied, 0, SpokeOTELColNamespace, SpokeOTELColName) + for _, tc := range []struct { + name string + replicas int64 + expectedErr string + }{ + { + name: "healthy", + replicas: 1, + }, + { + name: "unhealthy", + replicas: 0, + expectedErr: unhealthyError.Error(), + }, + } { + t.Run(tc.name, func(t *testing.T) { + healthProber := AgentHealthProber() + err := healthProber.WorkProber.HealthCheck(workv1.ResourceIdentifier{ + Group: otelv1alpha1.GroupVersion.Group, + Resource: OpenTelemetryCollectorsResource, + Name: SpokeOTELColName, + Namespace: SpokeOTELColNamespace, + }, workv1.StatusFeedbackResult{ + Values: []workv1.FeedbackValue{ + { + Name: "replicas", + Value: workv1.FieldValue{ + Type: workv1.Integer, + Integer: &tc.replicas, + }, + }, + }, + }) + if tc.expectedErr != "" { + require.EqualError(t, err, tc.expectedErr) + return + } + require.NoError(t, err) + }) + } +} diff --git a/internal/addon/manifests/charts/mcoa/charts/logging/templates/clusterlogforwarder.yaml b/internal/addon/manifests/charts/mcoa/charts/logging/templates/clusterlogforwarder.yaml index 1fa49c6..be3b4c1 100644 --- a/internal/addon/manifests/charts/mcoa/charts/logging/templates/clusterlogforwarder.yaml +++ b/internal/addon/manifests/charts/mcoa/charts/logging/templates/clusterlogforwarder.yaml @@ -2,7 +2,7 @@ apiVersion: logging.openshift.io/v1 kind: ClusterLogForwarder metadata: - name: instance + name: mcoa-instance namespace: openshift-logging labels: app: {{ template "logginghelm.name" . }} diff --git a/internal/addon/manifests/charts/mcoa/charts/logging/templates/clusterrole-binding.yaml b/internal/addon/manifests/charts/mcoa/charts/logging/templates/clusterrole-binding.yaml new file mode 100644 index 0000000..f9fe8b0 --- /dev/null +++ b/internal/addon/manifests/charts/mcoa/charts/logging/templates/clusterrole-binding.yaml @@ -0,0 +1,52 @@ +{{- if .Values.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: openshift-logging:mcoa-logcollector:application-logs + labels: + app: {{ template "logginghelm.name" . }} + chart: {{ template "logginghelm.chart" . }} + release: {{ .Release.Name }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: collect-application-logs +subjects: + - kind: ServiceAccount + name: mcoa-logcollector + namespace: openshift-logging +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: openshift-logging:mcoa-logcollector:audit-logs + labels: + app: {{ template "logginghelm.name" . }} + chart: {{ template "logginghelm.chart" . }} + release: {{ .Release.Name }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: collect-audit-logs +subjects: + - kind: ServiceAccount + name: mcoa-logcollector + namespace: openshift-logging +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: openshift-logging:mcoa-logcollector:infrastructure-logs + labels: + app: {{ template "logginghelm.name" . }} + chart: {{ template "logginghelm.chart" . }} + release: {{ .Release.Name }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: collect-infrastructure-logs +subjects: + - kind: ServiceAccount + name: mcoa-logcollector + namespace: openshift-logging +{{- end }} \ No newline at end of file diff --git a/internal/addon/manifests/charts/mcoa/charts/logging/templates/service-account.yaml b/internal/addon/manifests/charts/mcoa/charts/logging/templates/service-account.yaml new file mode 100644 index 0000000..2356d10 --- /dev/null +++ b/internal/addon/manifests/charts/mcoa/charts/logging/templates/service-account.yaml @@ -0,0 +1,11 @@ +{{- if .Values.enabled }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: mcoa-logcollector + namespace: openshift-logging + labels: + app: {{ template "logginghelm.name" . }} + chart: {{ template "logginghelm.chart" . }} + release: {{ .Release.Name }} +{{- end }} \ No newline at end of file diff --git a/internal/addon/manifests/charts/mcoa/charts/tracing/templates/namespace.yaml b/internal/addon/manifests/charts/mcoa/charts/tracing/templates/namespace.yaml index b86b6ce..61ed4b7 100644 --- a/internal/addon/manifests/charts/mcoa/charts/tracing/templates/namespace.yaml +++ b/internal/addon/manifests/charts/mcoa/charts/tracing/templates/namespace.yaml @@ -11,7 +11,7 @@ metadata: apiVersion: v1 kind: Namespace metadata: - name: spoke-otelcol + name: mcoa-opentelemetry labels: app: {{ template "tracinghelm.name" . }} chart: {{ template "tracinghelm.chart" . }} diff --git a/internal/addon/manifests/charts/mcoa/charts/tracing/templates/opentelemetrycollector.yaml b/internal/addon/manifests/charts/mcoa/charts/tracing/templates/opentelemetrycollector.yaml index eb3bd4c..172ca0c 100644 --- a/internal/addon/manifests/charts/mcoa/charts/tracing/templates/opentelemetrycollector.yaml +++ b/internal/addon/manifests/charts/mcoa/charts/tracing/templates/opentelemetrycollector.yaml @@ -2,8 +2,8 @@ apiVersion: opentelemetry.io/v1beta1 kind: OpenTelemetryCollector metadata: - name: spoke-otelcol - namespace: spoke-otelcol + name: mcoa-instance + namespace: mcoa-opentelemetry labels: app: {{ template "tracinghelm.name" . }} chart: {{ template "tracinghelm.chart" . }} diff --git a/internal/addon/manifests/charts/mcoa/charts/tracing/templates/secret.yaml b/internal/addon/manifests/charts/mcoa/charts/tracing/templates/secret.yaml index c589902..bc874f5 100644 --- a/internal/addon/manifests/charts/mcoa/charts/tracing/templates/secret.yaml +++ b/internal/addon/manifests/charts/mcoa/charts/tracing/templates/secret.yaml @@ -4,7 +4,7 @@ apiVersion: v1 kind: Secret metadata: name: {{ $secret_config.name }} - namespace: spoke-otelcol + namespace: mcoa-opentelemetry labels: app: {{ template "tracinghelm.name" $ }} chart: {{ template "tracinghelm.chart" $ }} diff --git a/internal/addon/var.go b/internal/addon/var.go index 79be9db..01f9199 100644 --- a/internal/addon/var.go +++ b/internal/addon/var.go @@ -16,6 +16,18 @@ const ( AdcLoggingDisabledKey = "loggingDisabled" AdcTracingisabledKey = "tracingDisabled" + + ClusterLogForwardersResource = "clusterlogforwarders" + SpokeCLFName = "mcoa-instance" + SpokeCLFNamespace = "openshift-logging" + clfProbeKey = "isReady" + clfProbePath = ".status.conditions[?(@.type==\"Ready\")].status" + + OpenTelemetryCollectorsResource = "opentelemetrycollectors" + SpokeOTELColName = "mcoa-instance" + SpokeOTELColNamespace = "mcoa-opentelemetry" + otelColProbeKey = "replicas" + otelColProbePath = ".spec.replicas" ) //go:embed manifests diff --git a/internal/logging/handlers/handler.go b/internal/logging/handlers/handler.go index 5e0492f..c7382ee 100644 --- a/internal/logging/handlers/handler.go +++ b/internal/logging/handlers/handler.go @@ -11,16 +11,12 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -const ( - clusterLogForwarderResource = "clusterlogforwarders" -) - func BuildOptions(ctx context.Context, k8s client.Client, mcAddon *addonapiv1alpha1.ManagedClusterAddOn, adoc *addonapiv1alpha1.AddOnDeploymentConfig) (manifests.Options, error) { resources := manifests.Options{ AddOnDeploymentConfig: adoc, } - key := addon.GetObjectKey(mcAddon.Status.ConfigReferences, loggingv1.GroupVersion.Group, clusterLogForwarderResource) + key := addon.GetObjectKey(mcAddon.Status.ConfigReferences, loggingv1.GroupVersion.Group, addon.ClusterLogForwardersResource) clf := &loggingv1.ClusterLogForwarder{} if err := k8s.Get(ctx, key, clf, &client.GetOptions{}); err != nil { return resources, err diff --git a/internal/logging/helm_test.go b/internal/logging/helm_test.go index 695c32e..d200164 100644 --- a/internal/logging/helm_test.go +++ b/internal/logging/helm_test.go @@ -92,7 +92,7 @@ func Test_Logging_AllConfigsTogether_AllResources(t *testing.T) { }, ConfigReferent: addonapiv1alpha1.ConfigReferent{ Namespace: "open-cluster-management", - Name: "instance", + Name: "mcoa-instance", }, }, } @@ -100,7 +100,7 @@ func Test_Logging_AllConfigsTogether_AllResources(t *testing.T) { // Setup configuration resources: ClusterLogForwarder, AddOnDeploymentConfig, Secrets, ConfigMaps clf = &loggingv1.ClusterLogForwarder{ ObjectMeta: metav1.ObjectMeta{ - Name: "instance", + Name: "mcoa-instance", Namespace: "open-cluster-management", Annotations: map[string]string{ "authentication.mcoa.openshift.io/app-logs": "SecretReference", @@ -215,7 +215,7 @@ func Test_Logging_AllConfigsTogether_AllResources(t *testing.T) { // Render manifests and return them as k8s runtime objects objects, err := loggingAgentAddon.Manifests(managedCluster, managedClusterAddOn) require.NoError(t, err) - require.Equal(t, 7, len(objects)) + require.Equal(t, 11, len(objects)) for _, obj := range objects { switch obj := obj.(type) { @@ -226,6 +226,10 @@ func Test_Logging_AllConfigsTogether_AllResources(t *testing.T) { require.NotNil(t, obj.Spec.Outputs[1].Secret) require.Equal(t, "static-authentication", obj.Spec.Outputs[0].Secret.Name) require.Equal(t, "static-authentication", obj.Spec.Outputs[1].Secret.Name) + // Check name and namespace to make sure that if we change the helm + // manifests that we don't break the addon probes + require.Equal(t, addon.SpokeCLFName, obj.Name) + require.Equal(t, addon.SpokeCLFNamespace, obj.Namespace) case *corev1.Secret: if obj.Name == "static-authentication" { require.Equal(t, staticCred.Data, obj.Data) diff --git a/internal/logging/manifests/logging.go b/internal/logging/manifests/logging.go index d694208..c7b70bf 100644 --- a/internal/logging/manifests/logging.go +++ b/internal/logging/manifests/logging.go @@ -54,6 +54,7 @@ func buildClusterLogForwarderSpec(resources Options) (*loggingv1.ClusterLogForwa return nil, err } } + clf.Spec.ServiceAccountName = "mcoa-logcollector" return &clf.Spec, nil } diff --git a/internal/logging/manifests/logging_test.go b/internal/logging/manifests/logging_test.go index 1c6f9ae..8b8cd58 100644 --- a/internal/logging/manifests/logging_test.go +++ b/internal/logging/manifests/logging_test.go @@ -119,7 +119,7 @@ func Test_BuildCLFSpec(t *testing.T) { }, ConfigReferent: addonapiv1alpha1.ConfigReferent{ Namespace: "open-cluster-management", - Name: "instance", + Name: "mcoa-instance", }, }, { @@ -147,7 +147,7 @@ func Test_BuildCLFSpec(t *testing.T) { // Setup configuration resources: ClusterLogForwarder, AddOnDeploymentConfig, Secrets, ConfigMaps clf = &loggingv1.ClusterLogForwarder{ ObjectMeta: metav1.ObjectMeta{ - Name: "instance", + Name: "mcoa-instance", Namespace: "open-cluster-management", }, Spec: loggingv1.ClusterLogForwarderSpec{ diff --git a/internal/tracing/handlers/handler.go b/internal/tracing/handlers/handler.go index 293e413..a829b80 100644 --- a/internal/tracing/handlers/handler.go +++ b/internal/tracing/handlers/handler.go @@ -15,10 +15,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -const ( - opentelemetryCollectorResource = "opentelemetrycollectors" -) - var ( errNoExportersFound = fmt.Errorf("no exporters found") errNoMountPathFound = fmt.Errorf("mountpath not found in any secret") @@ -32,7 +28,7 @@ func BuildOptions(ctx context.Context, k8s client.Client, mcAddon *addonapiv1alp } klog.Info("Retrieving OpenTelemetry Collector template") - key := addon.GetObjectKey(mcAddon.Status.ConfigReferences, otelv1beta1.GroupVersion.Group, opentelemetryCollectorResource) + key := addon.GetObjectKey(mcAddon.Status.ConfigReferences, otelv1beta1.GroupVersion.Group, addon.OpenTelemetryCollectorsResource) otelCol := &otelv1beta1.OpenTelemetryCollector{} if err := k8s.Get(ctx, key, otelCol, &client.GetOptions{}); err != nil { return resources, err diff --git a/internal/tracing/helm_test.go b/internal/tracing/helm_test.go index 8c15b47..67d734c 100644 --- a/internal/tracing/helm_test.go +++ b/internal/tracing/helm_test.go @@ -100,7 +100,7 @@ func Test_Tracing_AllConfigsTogether_AllResources(t *testing.T) { }, ConfigReferent: addonapiv1alpha1.ConfigReferent{ Namespace: "open-cluster-management", - Name: "spoke-otelcol", + Name: "mcoa-instance", }, }, } @@ -108,7 +108,7 @@ func Test_Tracing_AllConfigsTogether_AllResources(t *testing.T) { // Setup configuration resources: OpenTelemetryCollector, AddOnDeploymentConfig otelCol := otelv1beta1.OpenTelemetryCollector{ ObjectMeta: metav1.ObjectMeta{ - Name: "spoke-otelcol", + Name: "mcoa-instance", Namespace: "open-cluster-management", }, Spec: otelv1beta1.OpenTelemetryCollectorSpec{ @@ -191,8 +191,10 @@ func Test_Tracing_AllConfigsTogether_AllResources(t *testing.T) { for _, obj := range objects { switch obj := obj.(type) { case *otelv1beta1.OpenTelemetryCollector: - require.Equal(t, "spoke-otelcol", obj.ObjectMeta.Name) - require.Equal(t, "spoke-otelcol", obj.ObjectMeta.Namespace) + // Check name and namespace to make sure that if we change the helm + // manifests that we don't break the addon probes + require.Equal(t, addon.SpokeOTELColName, obj.Name) + require.Equal(t, addon.SpokeOTELColNamespace, obj.Namespace) require.NotEmpty(t, obj.Spec.Config) case *corev1.Secret: if obj.Name == "tracing-otlphttp-auth" { diff --git a/main.go b/main.go index fa76edc..6cb977d 100644 --- a/main.go +++ b/main.go @@ -150,6 +150,7 @@ func runController(ctx context.Context, kubeConfig *rest.Config) error { utils.AddOnDeploymentConfigGVR, ). WithGetValuesFuncs(addonConfigValuesFn, addonhelm.GetValuesFunc(ctx, k8sClient)). + WithAgentHealthProber(addon.AgentHealthProber()). WithAgentRegistrationOption(registrationOption). WithScheme(scheme). BuildHelmAgentAddon()