Skip to content

Commit

Permalink
pkg/manifests: allow to configure enforced limits on PrometheusK8sConfig
Browse files Browse the repository at this point in the history
  • Loading branch information
fgouteroux committed Sep 28, 2022
1 parent 1a53238 commit e74813f
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 12 deletions.
10 changes: 10 additions & 0 deletions Documentation/openshiftdocs/modules/prometheusk8sconfig.adoc
Expand Up @@ -17,6 +17,16 @@ Appears in: link:clustermonitoringconfiguration.adoc[ClusterMonitoringConfigurat
| Property | Type | Description
|additionalAlertmanagerConfigs|[]link:additionalalertmanagerconfig.adoc[AdditionalAlertmanagerConfig]|AlertmanagerConfigs holds configuration about how the Prometheus component should communicate with aditional Alertmanager instances. default: nil

|enforcedLabelLimit|*uint64|EnforcedLabelLimit per-scrape limit on the number of labels accepted for a sample. If more than this number of labels are present post metric-relabeling, the entire scrape will be treated as failed. 0 means no limit. default: 0

|enforcedLabelNameLengthLimit|*uint64|EnforcedLabelNameLengthLimit per-scrape limit on the length of labels name that will be accepted for a sample. If a label name is longer than this number post metric-relabeling, the entire scrape will be treated as failed. 0 means no limit. default: 0

|enforcedLabelValueLengthLimit|*uint64|EnforcedLabelValueLengthLimit per-scrape limit on the length of labels value that will be accepted for a sample. If a label value is longer than this number post metric-relabeling, the entire scrape will be treated as failed. 0 means no limit. default: 0

|enforcedSampleLimit|*uint64|EnforcedSampleLimit defines a global limit on the number of scraped samples that will be accepted. This overrides any SampleLimit set per ServiceMonitor or/and PodMonitor. It is meant to be used by admins to enforce the SampleLimit to keep the overall number of samples/series under the desired limit. Note that if SampleLimit is lower that value will be taken instead. default: 0

|enforcedTargetLimit|*uint64|EnforcedTargetLimit defines a global limit on the number of scraped targets. This overrides any TargetLimit set per ServiceMonitor or/and PodMonitor. It is meant to be used by admins to enforce the TargetLimit to keep the overall number of targets under the desired limit. Note that if TargetLimit is lower, that value will be taken instead, except if either value is zero, in which case the non-zero value will be used. If both values are zero, no limit is enforced. default: 0

|enforcedBodySizeLimit|string|EnforcedBodySizeLimit enforces body size limit of Prometheus scrapes, if a scrape is bigger than the limit it will fail. 3 kinds of values are accepted:\n 1. empty value: no limit\n 2. a value in Prometheus size format, e.g. \"64MB\"\n 3. string \"automatic\", which means the limit will be automatically calculated based on\n cluster capacity.\ndefault: 64MB

|externalLabels|map[string]string|ExternalLabels defines labels to be added to any time series or alerts when communicating with external systems (federation, remote storage, Alertmanager). default: nil
Expand Down
20 changes: 20 additions & 0 deletions pkg/manifests/manifests.go
Expand Up @@ -1793,6 +1793,26 @@ func (f *Factory) PrometheusK8s(grpcTLS *v1.Secret, trustedCABundleCM *v1.Config
p.Spec.EnforcedBodySizeLimit = monv1.ByteSize(f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedBodySizeLimit)
}

if f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedSampleLimit != nil {
p.Spec.EnforcedSampleLimit = f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedSampleLimit
}

if f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedTargetLimit != nil {
p.Spec.EnforcedTargetLimit = f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedTargetLimit
}

if f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedLabelLimit != nil {
p.Spec.EnforcedLabelLimit = f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedLabelLimit
}

if f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedLabelNameLengthLimit != nil {
p.Spec.EnforcedLabelNameLengthLimit = f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedLabelNameLengthLimit
}

if f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedLabelValueLengthLimit != nil {
p.Spec.EnforcedLabelValueLengthLimit = f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedLabelValueLengthLimit
}

return p, nil
}

Expand Down
29 changes: 29 additions & 0 deletions pkg/manifests/types.go
Expand Up @@ -126,6 +126,35 @@ type PrometheusK8sConfig struct {
// cluster capacity.
// default: 64MB
EnforcedBodySizeLimit string `json:"enforcedBodySizeLimit,omitempty"`
// EnforcedLabelLimit per-scrape limit on the number of labels accepted for a sample. If more than this
// number of labels are present post metric-relabeling, the entire scrape will be treated as
// failed. 0 means no limit.
// default: 0
EnforcedLabelLimit *uint64 `json:"enforcedLabelLimit,omitempty"`
// EnforcedLabelNameLengthLimit per-scrape limit on the length of labels name that will be accepted for
// a sample. If a label name is longer than this number post metric-relabeling, the entire scrape
// will be treated as failed. 0 means no limit.
// default: 0
EnforcedLabelNameLengthLimit *uint64 `json:"enforcedLabelNameLengthLimit,omitempty"`
// EnforcedLabelValueLengthLimit per-scrape limit on the length of labels value that will be accepted for
// a sample. If a label value is longer than this number post metric-relabeling, the entire scrape will
// be treated as failed. 0 means no limit.
// default: 0
EnforcedLabelValueLengthLimit *uint64 `json:"enforcedLabelValueLengthLimit,omitempty"`
// EnforcedSampleLimit defines a global limit on the number of scraped samples that will be accepted.
// This overrides any SampleLimit set per ServiceMonitor or/and PodMonitor. It is meant to be
// used by admins to enforce the SampleLimit to keep the overall number of samples/series under the
// desired limit. Note that if SampleLimit is lower that value will be taken instead.
// default: 0
EnforcedSampleLimit *uint64 `json:"enforcedSampleLimit,omitempty"`
// EnforcedTargetLimit defines a global limit on the number of scraped targets. This overrides
// any TargetLimit set per ServiceMonitor or/and PodMonitor. It is meant to be used by admins to
// enforce the TargetLimit to keep the overall number of targets under the desired limit. Note
// that if TargetLimit is lower, that value will be taken instead, except if either value is
// zero, in which case the non-zero value will be used. If both values are zero, no limit is
// enforced.
// default: 0
EnforcedTargetLimit *uint64 `json:"enforcedTargetLimit,omitempty"`
// ExternalLabels defines labels to be added to any time series or alerts when communicating
// with external systems (federation, remote storage, Alertmanager).
// default: nil
Expand Down
78 changes: 66 additions & 12 deletions test/e2e/config_test.go
Expand Up @@ -240,6 +240,11 @@ func TestClusterMonitorPrometheusK8Config(t *testing.T) {
)

data := fmt.Sprintf(`prometheusK8s:
enforcedSampleLimit: 50000
enforcedTargetLimit: 10
enforcedLabelLimit: 500
enforcedLabelNameLengthLimit: 50
enforcedLabelValueLengthLimit: 600
logLevel: debug
retention: 10h
retentionSize: 15GB
Expand Down Expand Up @@ -293,6 +298,26 @@ func TestClusterMonitorPrometheusK8Config(t *testing.T) {
name: "assert remote write url value in set in CR",
assertion: assertRemoteWriteWasSet(f.Ns, crName, "https://test.remotewrite.com/api/write"),
},
{
name: "assert enforced sample limit is configured",
assertion: assertEnforcedSampleLimit(f.Ns, crName, 50000),
},
{
name: "assert enforced target limit is configured",
assertion: assertEnforcedTargetLimit(f.Ns, crName, 10),
},
{
name: "assert enforced label limit is configured",
assertion: assertEnforcedLabelLimit(f.Ns, crName, 500),
},
{
name: "assert enforced label name length limit is configured",
assertion: assertEnforcedLabelNameLengthLimit(f.Ns, crName, 50),
},
{
name: "assert enforced label value length limit",
assertion: assertEnforcedLabelValueLengthLimit(f.Ns, crName, 600),
},
{
name: "assert query log file value is set and correct",
assertion: assertQueryLogValueEquals(f.Ns, crName, "/tmp/test.log"),
Expand Down Expand Up @@ -634,6 +659,7 @@ func TestUserWorkloadMonitorPrometheusK8Config(t *testing.T) {
},
Data: map[string]string{
"config.yaml": fmt.Sprintf(`prometheus:
enforcedSampleLimit: 50000
enforcedTargetLimit: 10
enforcedLabelLimit: 500
enforcedLabelNameLengthLimit: 50
Expand Down Expand Up @@ -697,21 +723,25 @@ func TestUserWorkloadMonitorPrometheusK8Config(t *testing.T) {
name: "assert remote write url value in set in CR",
assertion: assertRemoteWriteWasSet(f.UserWorkloadMonitoringNs, crName, "https://test.remotewrite.com/api/write"),
},
{
name: "assert enforced sample limit is configured",
assertion: assertEnforcedSampleLimit(f.UserWorkloadMonitoringNs, crName, 50000),
},
{
name: "assert enforced target limit is configured",
assertion: assertEnforcedTargetLimit(10),
assertion: assertEnforcedTargetLimit(f.UserWorkloadMonitoringNs, crName, 10),
},
{
name: "assert enforced label limit is configured",
assertion: assertEnforcedLabelLimit(500),
assertion: assertEnforcedLabelLimit(f.UserWorkloadMonitoringNs, crName, 500),
},
{
name: "assert enforced label name length limit is configured",
assertion: assertEnforcedLabelNameLengthLimit(50),
assertion: assertEnforcedLabelNameLengthLimit(f.UserWorkloadMonitoringNs, crName, 50),
},
{
name: "assert enforced label value length limit",
assertion: assertEnforcedLabelValueLengthLimit(600),
assertion: assertEnforcedLabelValueLengthLimit(f.UserWorkloadMonitoringNs, crName, 600),
},
{
name: "assert query log file value is set and correct",
Expand Down Expand Up @@ -960,11 +990,35 @@ func assertRemoteWriteWasSet(namespace, crName, urlValue string) func(t *testing
}
}

func assertEnforcedTargetLimit(limit uint64) func(*testing.T) {
func assertEnforcedSampleLimit(namespace, crName string, limit uint64) func(*testing.T) {
ctx := context.Background()
return func(t *testing.T) {
err := framework.Poll(time.Second, 5*time.Minute, func() error {
prom, err := f.MonitoringClient.Prometheuses(namespace).Get(ctx, crName, metav1.GetOptions{})
if err != nil {
return err
}

if prom.Spec.EnforcedSampleLimit == nil {
return errors.New("EnforcedSampleLimit not set")
} else if *prom.Spec.EnforcedSampleLimit != limit {
return fmt.Errorf("expected EnforcedSampleLimit to be %d, but got %d", limit, *prom.Spec.EnforcedSampleLimit)
}

return nil
})

if err != nil {
t.Fatalf("Timed out waiting for EnforcedSampleLimit configuration: %v", err)
}
}
}

func assertEnforcedTargetLimit(namespace, crName string, limit uint64) func(*testing.T) {
ctx := context.Background()
return func(t *testing.T) {
err := framework.Poll(time.Second, 5*time.Minute, func() error {
prom, err := f.MonitoringClient.Prometheuses(f.UserWorkloadMonitoringNs).Get(ctx, "user-workload", metav1.GetOptions{})
prom, err := f.MonitoringClient.Prometheuses(namespace).Get(ctx, crName, metav1.GetOptions{})
if err != nil {
return err
}
Expand All @@ -984,11 +1038,11 @@ func assertEnforcedTargetLimit(limit uint64) func(*testing.T) {
}
}

func assertEnforcedLabelLimit(limit uint64) func(*testing.T) {
func assertEnforcedLabelLimit(namespace, crName string, limit uint64) func(*testing.T) {
ctx := context.Background()
return func(t *testing.T) {
err := framework.Poll(time.Second, 5*time.Minute, func() error {
prom, err := f.MonitoringClient.Prometheuses(f.UserWorkloadMonitoringNs).Get(ctx, "user-workload", metav1.GetOptions{})
prom, err := f.MonitoringClient.Prometheuses(namespace).Get(ctx, crName, metav1.GetOptions{})
if err != nil {
return err
}
Expand All @@ -1008,11 +1062,11 @@ func assertEnforcedLabelLimit(limit uint64) func(*testing.T) {
}
}

func assertEnforcedLabelNameLengthLimit(limit uint64) func(*testing.T) {
func assertEnforcedLabelNameLengthLimit(namespace, crName string, limit uint64) func(*testing.T) {
ctx := context.Background()
return func(t *testing.T) {
err := framework.Poll(time.Second, 5*time.Minute, func() error {
prom, err := f.MonitoringClient.Prometheuses(f.UserWorkloadMonitoringNs).Get(ctx, "user-workload", metav1.GetOptions{})
prom, err := f.MonitoringClient.Prometheuses(namespace).Get(ctx, crName, metav1.GetOptions{})
if err != nil {
return err
}
Expand All @@ -1032,11 +1086,11 @@ func assertEnforcedLabelNameLengthLimit(limit uint64) func(*testing.T) {
}
}

func assertEnforcedLabelValueLengthLimit(limit uint64) func(*testing.T) {
func assertEnforcedLabelValueLengthLimit(namespace, crName string, limit uint64) func(*testing.T) {
ctx := context.Background()
return func(t *testing.T) {
err := framework.Poll(time.Second, 5*time.Minute, func() error {
prom, err := f.MonitoringClient.Prometheuses(f.UserWorkloadMonitoringNs).Get(ctx, "user-workload", metav1.GetOptions{})
prom, err := f.MonitoringClient.Prometheuses(namespace).Get(ctx, crName, metav1.GetOptions{})
if err != nil {
return err
}
Expand Down

0 comments on commit e74813f

Please sign in to comment.