From adb7bf730de687ee36e83cf3fd1fb54e9183744d Mon Sep 17 00:00:00 2001
From: Jacob Tanenbaum <jtanenba@redhat.com>
Date: Tue, 24 Sep 2019 10:51:21 -0400
Subject: [PATCH] Correct the way nodes are computed for alert
 ClusterIPTablesStale

Change kube_pod_info_node_count to sum(kube_pod_info{namespace="openshift-sdn",  pod=~"ovs.*"}) this more accuratly computes
the alert by returning the number of nodes that have an ovs pod running

also change using time() to timestamp()
---
 bindata/network/openshift-sdn/alert-rules.yaml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/bindata/network/openshift-sdn/alert-rules.yaml b/bindata/network/openshift-sdn/alert-rules.yaml
index 4a12ba6672..415e9f3382 100644
--- a/bindata/network/openshift-sdn/alert-rules.yaml
+++ b/bindata/network/openshift-sdn/alert-rules.yaml
@@ -67,7 +67,10 @@ spec:
       annotations:
         message: The average time between iptables resyncs is too high. NOTE - There is some scrape delay and other offsets, 90s isn't exact but it is still too high.
       expr: |
-        time() - (sum(kubeproxy_sync_proxy_rules_last_timestamp_seconds) / :kube_pod_info_node_count:) > 90
+        quantile(0.95,
+            timestamp(kubeproxy_sync_proxy_rules_last_timestamp_seconds)
+            - on(pod) kubeproxy_sync_proxy_rules_last_timestamp_seconds
+            * on(pod) group_right kube_pod_info{namespace="openshift-sdn",  pod=~"sdn-[^-]*"}) > 90
       for: 20m
       labels:
         severity: warning