## Automatic SLA Reporting Prototype

This notebook is for development and testing of an automated script for pulling data from Thanos and compiling an SLA report to be emailed to Legal.

First, we verify that we trust the Red Hat CAs (since Thanos uses an SSL cert signed by those CA's, and OpenSSL will throw a fit if it doesn't trust them.
Solution derived from https://incognitjoe.github.io/adding-certs-to-requests.html

API client docs: https://prometheus-api-client-python.readthedocs.io/en/latest/source/prometheus_api_client.html#prometheus_api_client.prometheus_connect.PrometheusConnect

Obtain an API key: https://datahub.psi.redhat.com/console/catalog

Other help: https://help.datahub.redhat.com/docs/interacting-with-telemetry-data

In [1]:
!pip3 install certifi requests prometheus_api_client tabulate PyJWT

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
import certifi
import requests
import yaml
import prometheus_api_client
from string import Template
from tabulate import tabulate
import jwt
from typing import Dict, List

In [3]:
class UnifiedHybridClient:
    """
    Limited access to the UHC API for authentication and cluster searching
    """

    def __init__(self, api_url: str, offline_token: str, public_key: str = None):
        """
        Instantiate a UnifiedHybrid client object
        
        :param api_url: (str) the URL for the UHC API
        :param offline_token: (str) a JSON Web Token string with the 
            "offline_access" permission. Usually obtained from 
            https://cloud.redhat.com/openshift/token
        :param public_key: (str) the RSA public key that corresponds
            to the JSON Web Token you provide. Hint: https://jwt.io/
            can extract this from a JWT for you
        """

        self.api_url = api_url
        self.offline_token = offline_token.strip()
        self.public_key = public_key.strip() if public_key is not None else public_key

        # Extract info from the offline token
        ot_decoded = jwt.decode(
            self.offline_token,
            self.public_key,
            algorithms="RS256",
            verify=(self.public_key is not None),
        )
        self.iss_url = ot_decoded["iss"]
        self.client_id = ot_decoded["aud"]

    def __get_access_token(self) -> str:
        """
        Obtain a short-lived access token from the SSO service
        
        :returns: (str) a short-lived access token
        """
        response = requests.post(
            "{}/protocol/openid-connect/token".format(self.iss_url),
            data={
                "grant_type": "refresh_token",
                "client_id": self.client_id,
                "refresh_token": self.offline_token,
            },
            headers={"accept": "application/json"},
        )
        return response.json()["access_token"]

    def search_clusters(self, query: str) -> dict:
        """
        Query a list of clusters from the UHC HTTP API.

        :param query: (str) Specifies the search criteria. This syntax of 
            this parameter is similar to the syntax of the WHERE clause of 
            an SQL statement, but using the names of the attributes of the
            cluster instead of the names of the columns of a table.
        :returns: (dict) the response from the API in dict format
        """
        response = requests.get(
            "{}/api/clusters_mgmt/v1/clusters".format(self.api_url),
            verify=True,
            headers={
                "accept": "application/json",
                "Authorization": "Bearer " + self.__get_access_token(),
            },
            params={"search": query},
        )
        if response.status_code == 200:
            data = response.json()
        else:
            raise Exception(
                "HTTP Status Code {} ({})".format(
                    response.status_code, response.content
                )
            )

        return data

In [4]:
class SLAReporter:
    """
    Generate formatted reports on SLA compliance
    """

    caution_threshold = 0.01
    default_css = """<style>
            .danger {color: red; font-weight: bold;}
            .caution {color: darkorange; font-weight: bold;}
            .success {color: green;}
        </style>"""

    def __init__(self, config: dict):
        """
        Instantiate a SLAReporter object
        
        :param config: (dict) the "settings" for this class, including URLs
            and rules. Usually originates from a YAML file. 
        """
        self.config = config

        # Connect to Telemeter-LTS
        if not self.__check_ssl_certs(self.config["api"]["telemeter"]["url"]):
            raise Exception("Can't connect to Telemeter-LTS")
        self.pc = prometheus_api_client.prometheus_connect.PrometheusConnect(
            url=self.config["api"]["telemeter"]["url"],
            headers={
                "Authorization": "bearer " + self.config["api"]["telemeter"]["token"]
            },
            disable_ssl=False,
        )

        # Connect to UHC
        self.uhc = UnifiedHybridClient(
            config["api"]["uhc"]["url"], config["api"]["uhc"]["token"]
        )

    def get_cluster_ids(self, search_query: str) -> Dict[str, str]:
        """
        Gets the names and external_ids of all clusters matching 
        a search query from the UHC CLI
        
        :param search_query: (str) a UHC search string (see UHC API docs)
        :returns: (dict) a dict with selected cluster names as keys and 
            their external_ids as values
        """
        clist = self.uhc.search_clusters(search_query)
        return {
            x["name"]: "_id='{}'".format(x["external_id"])
            for x in clist["items"]
            if "external_id" in x.keys()
        }

    def generate_report(
        self, cluster_ids: Dict[str, str], fmt: str = "html"
    ) -> List[List[str]]:
        """
        Generate a raw SLA report by running each configured query
        against the provided list of cluster IDs
        
        :param cluster_ids: (dict) a dict with selected cluster names as 
            keys and their external_ids as values
        :param fmt: (str) specifies what kind of formatting padding should
            be included. For example, specifying "html" will wrap numbers
            in <span> tags, "plain" wraps with BASH color specifiers, etc.
            Providing None disables all wrapping.
        :returns: (list) a report in form of a table (i.e. list of lists)
        """
        table = []
        for name, selector in cluster_ids.items():
            row = [name]
            for rule in self.config["rules"]:
                query_params = {
                    **{k: v for k, v in rule.items() if k != "query"},
                    **{"sel": selector},
                }
                query = Template(rule["query"]).substitute(**query_params)
                try:
                    query_res = self.pc.custom_query(query)
                    sli = round(float(query_res[0]["value"][1]) * 100, 4)
                    sla = float(rule["sla"]) * 100
                    row += [
                        str(sla) + ("&#37;" if fmt == "html" else "%"),
                        self.__format_sli(sli, sla, fmt),
                    ]
                except:
                    print("Query failed:" + str(query))
                    row += [str(sla) + ("&#37;" if fmt == "html" else "%"), ""]
            table.append(row)

        return table

    def generate_headers(self) -> List[str]:
        """
        Generate the header row of the report based on the configured rules
        
        :returns: a single list representing the header row
        """
        return ["Cluster"] + list(
            sum(
                [
                    (r["name"] + " SLA", r["name"] + " Perf.")
                    for r in self.config["rules"]
                ],
                (),
            )
        )

    @classmethod
    def format_report(
        cls,
        headers: List[str],
        table: List[List[str]],
        fmt: str = "html",
        css: str = None,
    ) -> str:
        """
        Format a pre-generated report using tabulate and print to string
        
        Basically, this wraps around tabulate, but does the work of adding
        CSS styles for you. Optionally, you can provide your own CSS style
        
        :param headers: (list) the header row of the report
        :param table: (list) the contents of the report
        :param fmt: (str) passed to tabulate as the "tablefmt" param
        :param css: (str) optional custom CSS <style> block
        """
        css = css or cls.default_css
        return css + tabulate(table, headers, tablefmt=fmt, stralign="center")

    @staticmethod
    def __check_ssl_certs(url: str) -> bool:
        """
        Checks if the Red Hat SSL CA certs are installed by connecting
        to a URL that uses them.

        :param url: (str) an HTTPS URL utilizing Red Hat-signed certificates
        :returns: (bool) true if we could successfully connect to the URL
        """
        retries = 3
        success = False
        while not success and retries > 0:
            retries -= 1
            try:
                requests.get(url)
                success = True
            except requests.exceptions.SSLError as err:
                cafile = certifi.where()
                with open("RHCertBundle.pem", "rb") as infile:
                    customca = infile.read()
                with open(cafile, "ab") as outfile:
                    outfile.write(customca)

        return success

    @classmethod
    def __format_sli(cls, sli: float, sla: float, fmt: str = "html") -> str:
        """
        Adds CSS formatting to the value of an SLI based on whether 
        or not it complies with SLA

        :param sli: (float) the current value of the SLI 
        :param sla: (float) the minimum "good" value of the SLI
        :param fmt: (str) What kind of formatting to apply. Options
            include "html", "plain", "simple", "grid", "fancy_grid"
        :returns: (str) a formatted HTML string
        """
        if sli - sla < 0:
            html_template = "<span class='danger'>{}&#37;</span>"
            shell_template = "\033[1;31m{}%\033[0m"
        elif sli - sla < cls.caution_threshold:
            html_template = "<span class='caution'>{}&#37;</span>"
            shell_template = "\033[1;33m{}%\033[0m"
        else:
            html_template = "<span class='success'>{}&#37;</span>"
            shell_template = "\033[0;32m{}%\033[0m"

        if fmt in ["plain", "simple", "grid", "fancy_grid"]:
            return shell_template.format(sli)
        elif fmt == "html":
            return html_template.format(sli)
        else:
            return str(sli)

## Demo

In [5]:
# Load config file
with open("sla_report_config.yml", "r") as f:
    config = yaml.safe_load(f)

# Create SLAReporter instance
slar = SLAReporter(config)

In [6]:
# Get all clusters with a managed flag
clids = {}
for uhc_query in config['clusters']:
    clids.update(slar.get_cluster_ids(uhc_query))
clids

{'osd-v4stg-aws': "_id='c03103eb-1571-498d-b1fd-70587b445faa'",
 'osd-v4prod-aws': "_id='18e66bcf-3090-4519-a188-4ffb63fb6104'"}

In [9]:
# Do the actual queries (this may take a while...)
rep = slar.generate_report(clids, fmt="html")
rep

[['osd-v4stg-aws',
  '99.5&#37;',
  "<span class='success'>99.9206&#37;</span>",
  '99.9&#37;',
  "<span class='success'>99.9999&#37;</span>",
  '99.9&#37;',
  "<span class='success'>100.0&#37;</span>",
  '99.5&#37;',
  "<span class='success'>99.9206&#37;</span>",
  '99.0&#37;',
  "<span class='success'>99.9504&#37;</span>",
  '99.5&#37;',
  "<span class='danger'>17.1131&#37;</span>",
  '99.0&#37;',
  "<span class='danger'>9.3254&#37;</span>",
  '99.99&#37;',
  "<span class='success'>100.0&#37;</span>"],
 ['osd-v4prod-aws',
  '99.5&#37;',
  "<span class='success'>99.8214&#37;</span>",
  '99.9&#37;',
  "<span class='danger'>98.5457&#37;</span>",
  '99.9&#37;',
  "<span class='success'>100.0&#37;</span>",
  '99.5&#37;',
  "<span class='success'>99.8214&#37;</span>",
  '99.0&#37;',
  "<span class='success'>99.6032&#37;</span>",
  '99.5&#37;',
  "<span class='danger'>75.6052&#37;</span>",
  '99.0&#37;',
  "<span class='danger'>0.4067&#37;</span>",
  '99.99&#37;',
  "<span class='success'>1

In [10]:
from IPython.core.display import display, HTML
pure_css="""
<style>
.pure-table{border-collapse:collapse;border-spacing:0;empty-cells:show;border:1px solid #cbcbcb;font-family:Sans-serif}.pure-table td,.pure-table th{border-left:1px solid #cbcbcb;border-width:0 0 0 1px;font-size:inherit;margin:0;overflow:visible;padding:.5em 1em}.pure-table thead{background-color:#e0e0e0;color:#000;text-align:left;vertical-align:bottom}.pure-table td{background-color:transparent}.pure-table-odd td{background-color:#f2f2f2}.pure-table-striped tr:nth-child(2n-1) td{background-color:#f2f2f2}.pure-table-bordered td{border-bottom:1px solid #cbcbcb}.pure-table-bordered tbody > tr:last-child > td{border-bottom-width:0}.pure-table-horizontal td,.pure-table-horizontal th{border-width:0 0 1px;border-bottom:1px solid #cbcbcb}.pure-table-horizontal tbody > tr:last-child > td{border-bottom-width:0}.danger{color:red;font-weight:700}.caution{color:#ff8c00;font-weight:700}.success{color:green}
</style>
"""
# Format the report as HTML and display it
htm = slar.format_report(slar.generate_headers(), rep, css=pure_css).replace("<table>", "<table class='pure-table pure-table-striped pure-table-horizontal'>")
display(HTML(htm))
#print(slar.format_report(slar.generate_headers(), rep, fmt="plain"))

Cluster,CtrlPlane General SLA,CtrlPlane General Perf.,CtrlPlane API SLA,CtrlPlane API Perf.,CtrlPlane etcd SLA,CtrlPlane etcd Perf.,CtrlPlane Latency SLA,CtrlPlane Latency Perf.,Registry General SLA,Registry General Perf.,Compute General SLA,Compute General Perf.,Compute Resiliency SLA,Compute Resiliency Perf.,Support Monitoring SLA,Support Monitoring Perf.
osd-v4stg-aws,99.5%,99.9206%,99.9%,99.9999%,99.9%,100.0%,99.5%,99.9206%,99.0%,99.9504%,99.5%,17.1131%,99.0%,9.3254%,99.99%,100.0%
osd-v4prod-aws,99.5%,99.8214%,99.9%,98.5457%,99.9%,100.0%,99.5%,99.8214%,99.0%,99.6032%,99.5%,75.6052%,99.0%,0.4067%,99.99%,100.0%
