Skip to content

Commit

Permalink
Merge pull request #197 from stackrox/master
Browse files Browse the repository at this point in the history
sync: master to stage
  • Loading branch information
stehessel committed Feb 8, 2024
2 parents c03a771 + 2dcb2e0 commit b52cfe2
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 0 deletions.
31 changes: 31 additions & 0 deletions resources/prometheus/prometheus-rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,37 @@ spec:
description: |
Remaining DB instances: {{ with query "acs_fleetshard_central_db_instances_max - acs_fleetshard_central_db_instances_used" }}{{ . | first | value | humanize }}{{ end }}. A quota increase must be requested from AWS.
sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-029-increase-aws-rds-limits.md"

- name: aws-ses
rules:
- alert: AWSSESReputationBounceRateTooHigh
expr: avg_over_time(aws_ses_reputation_bounce_rate_sum[1h]) > 0.05
labels:
severity: warning
annotations:
summary: 'AWS SES Bounce rate too high'
description: 'The SES bounce rate is {{ $value | humanizePercentage }}, which is higher than the maximum limit of 5%.'
sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-040-aws-ses-violation.md"
- alert: AWSSESReputationComplaintRateTooHigh
expr: avg_over_time(aws_ses_reputation_complaint_rate_sum[1h]) > 0.01
labels:
severity: warning
annotations:
summary: 'AWS SES Complaint rate too high'
description: 'The SES complaint rate is {{ $value | humanizePercentage }}, which is higher than the maximum limit of 1%.'
sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-040-aws-ses-violation.md"
- alert: AWSSESSendRateTooHigh
# cloudwatch is setup to collect aws_ses_send_sum in 2 minute intervals (120 seconds)
# and the max send rate is 14/s.
expr: (max_over_time(aws_ses_send_sum[1h]) / 120) > 12
labels:
severity: warning
annotations:
summary: 'AWS SES Send rate too high'
description: 'The maximum send rate over the last hour is {{ $value }} messages/second, which is dangerously approaching the maximum limit of 14 per second.'
sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-040-aws-ses-violation.md"


- name: rhacs-probe
rules:
- alert: RHACSProbeRunFailed
Expand Down
60 changes: 60 additions & 0 deletions resources/prometheus/unit_tests/AWSSESQuota.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
rule_files:
- /tmp/prometheus-rules-test.yaml

evaluation_interval: 1m

tests:
- interval: 1m
input_series:
- series: aws_ses_reputation_bounce_rate_sum
values: "0.05+0x60 0.06+0x90"
alert_rule_test:
- eval_time: 60m
alertname: AWSSESReputationBounceRateTooHigh
exp_alerts: []
- eval_time: 130m
alertname: AWSSESReputationBounceRateTooHigh
exp_alerts:
- exp_labels:
alertname: AWSSESReputationBounceRateTooHigh
severity: warning
exp_annotations:
summary: 'AWS SES Bounce rate too high'
description: 'The SES bounce rate is 6%, which is higher than the maximum limit of 5%.'
sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-040-aws-ses-violation.md"
- interval: 1m
input_series:
- series: aws_ses_reputation_complaint_rate_sum
values: "0.01+0x60 0.02+0x90"
alert_rule_test:
- eval_time: 60m
alertname: AWSSESReputationComplaintRateTooHigh
exp_alerts: []
- eval_time: 130m
alertname: AWSSESReputationComplaintRateTooHigh
exp_alerts:
- exp_labels:
alertname: AWSSESReputationComplaintRateTooHigh
severity: warning
exp_annotations:
summary: 'AWS SES Complaint rate too high'
description: 'The SES complaint rate is 2%, which is higher than the maximum limit of 1%.'
sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-040-aws-ses-violation.md"
- interval: 2m
input_series:
- series: aws_ses_send_sum
values: "0+0x5 1680x20"
alert_rule_test:
- eval_time: 10m
alertname: AWSSESSendRateTooHigh
exp_alerts: []
- eval_time: 30m
alertname: AWSSESSendRateTooHigh
exp_alerts:
- exp_labels:
alertname: AWSSESSendRateTooHigh
severity: warning
exp_annotations:
summary: 'AWS SES Send rate too high'
description: 'The maximum send rate over the last hour is 14 messages/second, which is dangerously approaching the maximum limit of 14 per second.'
sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-040-aws-ses-violation.md"

0 comments on commit b52cfe2

Please sign in to comment.