Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MON-1708: Enforce label scrape limits in UWM #1350

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 3 additions & 2 deletions CHANGELOG.md
@@ -1,10 +1,11 @@
# Note: This CHANGELOG is only for the monitoring team to track all monitoring related changes. Please see OpenShift release notes for official changes.

# 4.11
## 4.11

- [#1567](https://github.com/openshift/cluster-monitoring-operator/pull/1567) Enable validating webhook for AlertmanagerConfig customer resources
- [#1567](https://github.com/openshift/cluster-monitoring-operator/pull/1567) Enable validating webhook for AlertmanagerConfig custom resources
- [#1557](https://github.com/openshift/cluster-monitoring-operator/pull/1557) Removing grafana from monitoring stack
- [1578](https://github.com/openshift/cluster-monitoring-operator/pull/1578) Add temporary cluster id label to remote write relabel configs.
- [#1350](https://github.com/openshift/cluster-monitoring-operator/pull/1350) Support label scrape limits in user-workload monitoring

## 4.10

Expand Down
27 changes: 15 additions & 12 deletions pkg/manifests/config.go
Expand Up @@ -491,18 +491,21 @@ type UserWorkloadConfiguration struct {
}

type PrometheusRestrictedConfig struct {
LogLevel string `json:"logLevel"`
Retention string `json:"retention"`
NodeSelector map[string]string `json:"nodeSelector"`
Tolerations []v1.Toleration `json:"tolerations"`
Resources *v1.ResourceRequirements `json:"resources"`
ExternalLabels map[string]string `json:"externalLabels"`
VolumeClaimTemplate *monv1.EmbeddedPersistentVolumeClaim `json:"volumeClaimTemplate"`
RemoteWrite []RemoteWriteSpec `json:"remoteWrite"`
EnforcedSampleLimit *uint64 `json:"enforcedSampleLimit"`
EnforcedTargetLimit *uint64 `json:"enforcedTargetLimit"`
AlertmanagerConfigs []AdditionalAlertmanagerConfig `json:"additionalAlertmanagerConfigs"`
QueryLogFile string `json:"queryLogFile"`
LogLevel string `json:"logLevel"`
Retention string `json:"retention"`
NodeSelector map[string]string `json:"nodeSelector"`
Tolerations []v1.Toleration `json:"tolerations"`
Resources *v1.ResourceRequirements `json:"resources"`
ExternalLabels map[string]string `json:"externalLabels"`
VolumeClaimTemplate *monv1.EmbeddedPersistentVolumeClaim `json:"volumeClaimTemplate"`
RemoteWrite []RemoteWriteSpec `json:"remoteWrite"`
EnforcedSampleLimit *uint64 `json:"enforcedSampleLimit"`
EnforcedTargetLimit *uint64 `json:"enforcedTargetLimit"`
EnforcedLabelLimit *uint64 `json:"enforcedLabelLimit"`
EnforcedLabelNameLengthLimit *uint64 `json:"enforcedLabelNameLengthLimit"`
EnforcedLabelValueLengthLimit *uint64 `json:"enforcedLabelValueLengthLimit"`
AlertmanagerConfigs []AdditionalAlertmanagerConfig `json:"additionalAlertmanagerConfigs"`
QueryLogFile string `json:"queryLogFile"`
}

func (u *UserWorkloadConfiguration) applyDefaults() {
Expand Down
12 changes: 12 additions & 0 deletions pkg/manifests/manifests.go
Expand Up @@ -1785,6 +1785,18 @@ func (f *Factory) PrometheusUserWorkload(grpcTLS *v1.Secret) (*monv1.Prometheus,
p.Spec.EnforcedTargetLimit = f.config.UserWorkloadConfiguration.Prometheus.EnforcedTargetLimit
}

if f.config.UserWorkloadConfiguration.Prometheus.EnforcedLabelLimit != nil {
p.Spec.EnforcedLabelLimit = f.config.UserWorkloadConfiguration.Prometheus.EnforcedLabelLimit
}

if f.config.UserWorkloadConfiguration.Prometheus.EnforcedLabelNameLengthLimit != nil {
p.Spec.EnforcedLabelNameLengthLimit = f.config.UserWorkloadConfiguration.Prometheus.EnforcedLabelNameLengthLimit
}

if f.config.UserWorkloadConfiguration.Prometheus.EnforcedLabelValueLengthLimit != nil {
p.Spec.EnforcedLabelValueLengthLimit = f.config.UserWorkloadConfiguration.Prometheus.EnforcedLabelValueLengthLimit
}

if f.config.Images.Thanos != "" {
p.Spec.Thanos.Image = &f.config.Images.Thanos
}
Expand Down
87 changes: 87 additions & 0 deletions test/e2e/config_test.go
Expand Up @@ -465,6 +465,9 @@ func TestUserWorkloadMonitorPrometheusK8Config(t *testing.T) {
Data: map[string]string{
"config.yaml": fmt.Sprintf(`prometheus:
enforcedTargetLimit: 10
enforcedLabelLimit: 500
enforcedLabelNameLengthLimit: 50
enforcedLabelValueLengthLimit: 600
logLevel: debug
retention: 10h
queryLogFile: /tmp/test.log
Expand Down Expand Up @@ -522,6 +525,18 @@ func TestUserWorkloadMonitorPrometheusK8Config(t *testing.T) {
name: "assert enforced target limit is configured",
assertion: assertEnforcedTargetLimit(10),
},
{
name: "assert enforced label limit is configured",
assertion: assertEnforcedLabelLimit(500),
},
{
name: "assert enforced label name length limit is configured",
assertion: assertEnforcedLabelNameLengthLimit(50),
},
{
name: "assert enforced label value length limit",
assertion: assertEnforcedLabelValueLengthLimit(600),
},
{
name: "assert query log file value is set and correct",
assertion: assertQueryLogValueEquals(f.UserWorkloadMonitoringNs, crName, "/tmp/test.log"),
Expand Down Expand Up @@ -763,6 +778,78 @@ func assertEnforcedTargetLimit(limit uint64) func(*testing.T) {
}
}

func assertEnforcedLabelLimit(limit uint64) func(*testing.T) {
ctx := context.Background()
return func(t *testing.T) {
err := framework.Poll(time.Second, 5*time.Minute, func() error {
prom, err := f.MonitoringClient.Prometheuses(f.UserWorkloadMonitoringNs).Get(ctx, "user-workload", metav1.GetOptions{})
if err != nil {
return err
}

if prom.Spec.EnforcedLabelLimit == nil {
return errors.New("EnforcedLabelLimit not set")
} else if *prom.Spec.EnforcedLabelLimit != limit {
return fmt.Errorf("expected EnforcedLabelLimit to be %d, but got %d", limit, *prom.Spec.EnforcedLabelLimit)
}

return nil
})

if err != nil {
t.Fatalf("Timed out waiting for EnforcedLabelLimit configuration: %v", err)
}
}
}

func assertEnforcedLabelNameLengthLimit(limit uint64) func(*testing.T) {
ctx := context.Background()
return func(t *testing.T) {
err := framework.Poll(time.Second, 5*time.Minute, func() error {
prom, err := f.MonitoringClient.Prometheuses(f.UserWorkloadMonitoringNs).Get(ctx, "user-workload", metav1.GetOptions{})
if err != nil {
return err
}

if prom.Spec.EnforcedLabelNameLengthLimit == nil {
return errors.New("EnforcedLabelNameLengthLimit not set")
} else if *prom.Spec.EnforcedLabelNameLengthLimit != limit {
return fmt.Errorf("expected EnforcedLabelNameLengthLimit to be %d, but got %d", limit, *prom.Spec.EnforcedLabelNameLengthLimit)
}

return nil
})

if err != nil {
t.Fatalf("Timed out waiting for EnforcedLabelNameLengthLimit configuration: %v", err)
}
}
}

func assertEnforcedLabelValueLengthLimit(limit uint64) func(*testing.T) {
ctx := context.Background()
return func(t *testing.T) {
err := framework.Poll(time.Second, 5*time.Minute, func() error {
prom, err := f.MonitoringClient.Prometheuses(f.UserWorkloadMonitoringNs).Get(ctx, "user-workload", metav1.GetOptions{})
if err != nil {
return err
}

if prom.Spec.EnforcedLabelValueLengthLimit == nil {
return errors.New("EnforcedLabelValueLengthLimit not set")
} else if *prom.Spec.EnforcedLabelValueLengthLimit != limit {
return fmt.Errorf("expected EnforcedLabelValueLengthLimit to be %d, but got %d", limit, *prom.Spec.EnforcedLabelValueLengthLimit)
}

return nil
})

if err != nil {
t.Fatalf("Timed out waiting for EnforcedLabelValueLengthLimit configuration: %v", err)
}
}
}

func assertQueryLogValueEquals(namespace, crName, value string) func(t *testing.T) {
return func(t *testing.T) {
err := framework.Poll(time.Second, time.Minute*5, func() error {
Expand Down