diff --git a/bundle/manifests/observability-operator.clusterserviceversion.yaml b/bundle/manifests/observability-operator.clusterserviceversion.yaml index 2b0ad89fc..bc3f2b18d 100644 --- a/bundle/manifests/observability-operator.clusterserviceversion.yaml +++ b/bundle/manifests/observability-operator.clusterserviceversion.yaml @@ -345,6 +345,18 @@ spec: - patch - update - watch + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create - apiGroups: - autoscaling resources: @@ -439,6 +451,16 @@ spec: - create - get - update + - apiGroups: + - monitoring.coreos.com + resources: + - servicemonitors + verbs: + - create + - delete + - get + - patch + - update - apiGroups: - monitoring.rhobs resources: diff --git a/cmd/operator/main.go b/cmd/operator/main.go index 2d03daf71..35357752f 100644 --- a/cmd/operator/main.go +++ b/cmd/operator/main.go @@ -44,6 +44,7 @@ var defaultImages = map[string]string{ "ui-distributed-tracing": "quay.io/openshift-observability-ui/distributed-tracing-console-plugin:v0.2.0", "ui-logging": "quay.io/openshift-logging/logging-view-plugin:6.0.0", "korrel8r": "quay.io/korrel8r/korrel8r:0.7.2", + "incidents": "quay.io/afalossi/cluster-health-analyzer:latest", } func imagesUsed() []string { diff --git a/deploy/operator/observability-operator-cluster-role.yaml b/deploy/operator/observability-operator-cluster-role.yaml index bc6170bfa..4b7b878c7 100644 --- a/deploy/operator/observability-operator-cluster-role.yaml +++ b/deploy/operator/observability-operator-cluster-role.yaml @@ -82,6 +82,18 @@ rules: - patch - update - watch +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create - apiGroups: - autoscaling resources: @@ -176,6 +188,16 @@ rules: - create - get - update +- apiGroups: + - monitoring.coreos.com + resources: + - servicemonitors + verbs: + - create + - delete + - get + - patch + - update - apiGroups: - monitoring.rhobs resources: diff --git a/go.mod b/go.mod index 1b12c13ea..7b666a772 100644 --- a/go.mod +++ b/go.mod @@ -80,6 +80,7 @@ require ( github.com/opentracing/opentracing-go v1.2.0 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus-community/prom-label-proxy v0.10.0 // indirect + github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.75.2 // indirect github.com/prometheus/alertmanager v0.27.0 // indirect github.com/prometheus/client_golang v1.19.1 // indirect github.com/prometheus/client_model v0.6.1 // indirect diff --git a/go.sum b/go.sum index 8cbb77e24..7a628578e 100644 --- a/go.sum +++ b/go.sum @@ -201,6 +201,8 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus-community/prom-label-proxy v0.10.0 h1:49S1uMRHuTUKoWU9Oj3hrKM2m8F3qkY3TPAOyKQnAVo= github.com/prometheus-community/prom-label-proxy v0.10.0/go.mod h1:hUQJDmGpo2bVBA03jBur8vvicfA8dbJdDg5hCjPZPw8= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.75.2 h1:6UsAv+jAevuGO2yZFU/BukV4o9NKnFMOuoouSA4G0ns= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.75.2/go.mod h1:XYrdZw5dW12Cjkt4ndbeNZZTBp4UCHtW0ccR9+sTtPU= github.com/prometheus/alertmanager v0.27.0 h1:V6nTa2J5V4s8TG4C4HtrBP/WNSebCCTYGGv4qecA/+I= github.com/prometheus/alertmanager v0.27.0/go.mod h1:8Ia/R3urPmbzJ8OsdvmZvIprDwvwmYCmUbwBL+jlPOE= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= diff --git a/pkg/controllers/uiplugin/components.go b/pkg/controllers/uiplugin/components.go index 4a0574e3a..c85ff7b9e 100644 --- a/pkg/controllers/uiplugin/components.go +++ b/pkg/controllers/uiplugin/components.go @@ -109,6 +109,18 @@ func pluginComponentReconcilers(plugin *uiv1alpha1.UIPlugin, pluginInfo UIPlugin } } + isIncidentsEnabled := pluginInfo.IncidentsImage != "" + if isIncidentsEnabled { + serviceAccountName := plugin.Name + serviceAccountSuffix + // components = append(components, reconciler.NewUpdater(newIncidentsClusterRoleBinding(namespace, serviceAccountName, "cluster-monitoring-view", serviceAccountName), plugin)) + components = append(components, reconciler.NewUpdater(newIncidentsClusterRoleBinding(namespace, serviceAccountName, "system:auth-delegator", serviceAccountName+":system:auth-delegator"), plugin)) + components = append(components, reconciler.NewUpdater(newIncidentsPrometheusRole(namespace), plugin)) + components = append(components, reconciler.NewUpdater(newIncidentsPrometheusRoleBinding(namespace), plugin)) + components = append(components, reconciler.NewUpdater(newIncidentsService(namespace), plugin)) + components = append(components, reconciler.NewUpdater(newIncidentsDeployment(namespace, serviceAccountName, pluginInfo), plugin)) + components = append(components, reconciler.NewUpdater(newIncidentsServiceMonitor(namespace), plugin)) + } + return components } diff --git a/pkg/controllers/uiplugin/controller.go b/pkg/controllers/uiplugin/controller.go index ddd5d558c..19e75f1ce 100644 --- a/pkg/controllers/uiplugin/controller.go +++ b/pkg/controllers/uiplugin/controller.go @@ -89,6 +89,11 @@ const ( //+kubebuilder:rbac:groups=monitoring.coreos.com,resources=prometheuses/api,resourceNames=k8s,verbs=get;create;update //+kubebuilder:rbac:groups=monitoring.coreos.com,resources=alertmanagers/api,resourceNames=main,verbs=get;list +// RBAC for Incidents Detection +//+kubebuilder:rbac:groups=authentication.k8s.io,resources=tokenreviews,verbs=create +//+kubebuilder:rbac:groups=authorization.k8s.io,resources=subjectaccessreviews,verbs=create +//+kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors,verbs=get;create;update;patch;delete + // RegisterWithManager registers the controller with Manager func RegisterWithManager(mgr ctrl.Manager, opts Options) error { logger := ctrl.Log.WithName("observability-ui") diff --git a/pkg/controllers/uiplugin/incidents.go b/pkg/controllers/uiplugin/incidents.go new file mode 100644 index 000000000..391cfa7ff --- /dev/null +++ b/pkg/controllers/uiplugin/incidents.go @@ -0,0 +1,240 @@ +package uiplugin + +import ( + monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/ptr" +) + +const ( + incidentsName = "cluster-health-analyzer" + volumeMountName = incidentsName + "-tls" +) + +func newIncidentsClusterRoleBinding(namespace string, serviceAccountName string, roleName string, crbName string) *rbacv1.ClusterRoleBinding { + return &rbacv1.ClusterRoleBinding{ + TypeMeta: metav1.TypeMeta{ + APIVersion: rbacv1.SchemeGroupVersion.String(), + Kind: "ClusterRoleBinding", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: crbName, + }, + Subjects: []rbacv1.Subject{ + { + APIGroup: corev1.SchemeGroupVersion.Group, + Kind: "ServiceAccount", + Name: serviceAccountName, + Namespace: namespace, + }, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.SchemeGroupVersion.Group, + Kind: "ClusterRole", + Name: roleName, + }, + } +} + +func newIncidentsPrometheusRole(namespace string) *rbacv1.Role { + role := &rbacv1.Role{ + TypeMeta: metav1.TypeMeta{ + APIVersion: rbacv1.SchemeGroupVersion.String(), + Kind: "Role", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "prometheus-k8s", + Namespace: namespace, + }, + Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{""}, + Resources: []string{"services", "endpoints", "pods"}, + Verbs: []string{"get", "list", "watch"}, + }, + }, + } + return role +} + +func newIncidentsPrometheusRoleBinding(namespace string) *rbacv1.RoleBinding { + roleBinding := &rbacv1.RoleBinding{ + TypeMeta: metav1.TypeMeta{ + APIVersion: rbacv1.SchemeGroupVersion.String(), + Kind: "RoleBinding", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "prometheus-k8s", + Namespace: namespace, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.SchemeGroupVersion.Group, + Kind: "Role", + Name: "prometheus-k8s", + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: "prometheus-k8s", + Namespace: "openshift-monitoring", + }, + }, + } + return roleBinding +} + +func newIncidentsService(namespace string) *corev1.Service { + service := &corev1.Service{ + TypeMeta: metav1.TypeMeta{ + APIVersion: corev1.SchemeGroupVersion.String(), + Kind: "Service", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: incidentsName, + Namespace: namespace, + Annotations: map[string]string{ + "service.beta.openshift.io/serving-cert-secret-name": volumeMountName, + }, + Labels: componentLabels(incidentsName), + }, + Spec: corev1.ServiceSpec{ + Ports: []corev1.ServicePort{ + { + Name: "metrics", + Port: 8443, + TargetPort: intstr.FromString("metrics"), + }, + }, + Selector: map[string]string{ + "app.kubernetes.io/instance": incidentsName, + }, + Type: corev1.ServiceTypeClusterIP, + }, + } + + return service +} + +func newIncidentsDeployment(namespace string, serviceAccountName string, pluginInfo UIPluginInfo) *appsv1.Deployment { + deploy := &appsv1.Deployment{ + TypeMeta: metav1.TypeMeta{ + APIVersion: appsv1.SchemeGroupVersion.String(), + Kind: "Deployment", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: incidentsName, + Namespace: namespace, + Labels: componentLabels(incidentsName), + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To(int32(1)), + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app.kubernetes.io/instance": incidentsName, + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: componentLabels(incidentsName), + }, + Spec: corev1.PodSpec{ + ServiceAccountName: serviceAccountName, + AutomountServiceAccountToken: ptr.To(true), + Containers: []corev1.Container{ + { + Name: incidentsName, + Image: pluginInfo.IncidentsImage, + ImagePullPolicy: corev1.PullAlways, + Args: []string{ + "--tls-cert-file=/etc/tls/private/tls.crt", + "--tls-private-key-file=/etc/tls/private/tls.key", + }, + Env: []corev1.EnvVar{ + { + Name: "PROM_URL", + Value: "https://thanos-querier.openshift-monitoring.svc.cluster.local:9091/", + }, + }, + SecurityContext: &corev1.SecurityContext{ + RunAsNonRoot: ptr.To(true), + AllowPrivilegeEscalation: ptr.To(false), + Capabilities: &corev1.Capabilities{ + Drop: []corev1.Capability{"ALL"}, + }, + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + Ports: []corev1.ContainerPort{ + { + ContainerPort: 8443, + Name: "metrics", + }, + }, + TerminationMessagePolicy: corev1.TerminationMessageFallbackToLogsOnError, + VolumeMounts: []corev1.VolumeMount{ + { + MountPath: "/etc/tls/private", + Name: volumeMountName, + ReadOnly: true, + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: volumeMountName, + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: volumeMountName, + }, + }, + }, + }, + }, + }, + }, + } + return deploy +} + +func newIncidentsServiceMonitor(namespace string) *monv1.ServiceMonitor { + serviceMonitor := &monv1.ServiceMonitor{ + TypeMeta: metav1.TypeMeta{ + APIVersion: monv1.SchemeGroupVersion.String(), + Kind: "ServiceMonitor", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: incidentsName, + Namespace: namespace, + }, + Spec: monv1.ServiceMonitorSpec{ + Endpoints: []monv1.Endpoint{ + { + Interval: "30s", + Port: "metrics", + Scheme: "https", + TLSConfig: &monv1.TLSConfig{ + SafeTLSConfig: monv1.SafeTLSConfig{ + ServerName: ptr.To(incidentsName + "." + namespace + ".svc"), + }, + CAFile: "/etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt", + CertFile: "/etc/prometheus/secrets/metrics-client-certs/tls.crt", + KeyFile: "/etc/prometheus/secrets/metrics-client-certs/tls.key", + }, + }, + }, + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app.kubernetes.io/instance": incidentsName, + }, + }, + }, + } + + return serviceMonitor +} diff --git a/pkg/controllers/uiplugin/plugin_info_builder.go b/pkg/controllers/uiplugin/plugin_info_builder.go index 834ff2992..eb7d04b2b 100644 --- a/pkg/controllers/uiplugin/plugin_info_builder.go +++ b/pkg/controllers/uiplugin/plugin_info_builder.go @@ -17,6 +17,7 @@ import ( type UIPluginInfo struct { Image string Korrel8rImage string + IncidentsImage string LokiServiceNames map[string]string Name string ConsoleName string @@ -132,6 +133,8 @@ func PluginInfoBuilder(ctx context.Context, k client.Client, plugin *uiv1alpha1. return nil, err } + pluginInfo.IncidentsImage = pluginConf.Images["incidents"] + pluginInfo.Korrel8rImage = pluginConf.Images["korrel8r"] pluginInfo.LokiServiceNames[OpenshiftLoggingNs], err = getLokiServiceName(ctx, k, OpenshiftLoggingNs) if err != nil { diff --git a/pkg/operator/scheme.go b/pkg/operator/scheme.go index 09ab01f28..2ae9c9792 100644 --- a/pkg/operator/scheme.go +++ b/pkg/operator/scheme.go @@ -4,6 +4,7 @@ import ( osv1 "github.com/openshift/api/console/v1" osv1alpha1 "github.com/openshift/api/console/v1alpha1" operatorv1 "github.com/openshift/api/operator/v1" + monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" monitoringv1 "github.com/rhobs/obo-prometheus-operator/pkg/apis/monitoring/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" "k8s.io/apimachinery/pkg/runtime" @@ -21,6 +22,7 @@ func NewScheme(cfg *OperatorConfiguration) *runtime.Scheme { utilruntime.Must(rhobsv1alpha1.AddToScheme(scheme)) utilruntime.Must(apiextensionsv1.AddToScheme(scheme)) utilruntime.Must(monitoringv1.AddToScheme(scheme)) + utilruntime.Must(monv1.AddToScheme(scheme)) utilruntime.Must(uiv1alpha1.AddToScheme(scheme)) if cfg.FeatureGates.OpenShift.Enabled {