# SLA Investigation
1. Run all cells! (click on Menu > Cell > Run All Cells)
1. View report at the bottom.

In [None]:
triggerTime = "2019-10-15T20:21:54.0330000Z"
scaleUnit = "pipelines-ghub-eus2-2"
service = "pipelines"
lookback = "1h"
region = ""

In [None]:
%%capture 

# install packages, setup workspace root
!pip install --upgrade pip azure-kusto-notebooks
import os
import sys
import datetime
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
pd.options.display.html.table_schema = True
import concurrent.futures
from azure.kusto.notebooks import utils as akn

# cwd should be workspace root
if os.path.basename(os.getcwd()) == 'devops-pipelines':
    os.chdir(os.pardir)

In [None]:
# authenticate kusto client
# you will need to copy the token into a browser window for AAD auth. 
client = akn.get_client('https://vso.kusto.windows.net')

In [None]:
# find orchestrations that violate SLA
params = {
    'TriggerTime': akn.to_kusto_datetime(triggerTime),
    'Lookback': akn.to_kusto_timespan(lookback),
    'Service': '"' + service + '"', 
    'Region': '"' + region + '"',
    'ScaleUnit': '"' + scaleUnit + '"'
}
query = os.path.join('devops-pipelines', 'queries', 'sla', 'SLADurationAnalysis.csl')
violations = akn.execute_file(client, database='VSO', path=query, params=params)
# violations

In [None]:
# collect problematic orchestration ids
result = violations.primary_results[0]
oid_column_index = next((c.ordinal for c in result.columns if c.column_name == 'OrchestrationId'), None)
su_column_index = next((c.ordinal for c in result.columns if c.column_name == 'ScaleUnit'), None)

# group
by_su = {}
for r in result.rows:
    su = r[su_column_index]
    oid = r[oid_column_index]
    l = by_su.get(su, [])
    by_su[su] = l
    l.append(oid)

max_scale_units = []
max_problems = 0
for k,v in by_su.items():
  c = len(v)
  if c > max_problems:
    max_problems = c
    max_scale_units = [k]
  elif c == max_problems:
    max_scale_units.append(k)
max_scale_units.sort()

# for su, oids in by_su.items():
#     print(su)
#     for oid in oids:
#         print('   ', oid)

In [None]:
# collect visualization data sets
query = os.path.join('devops-pipelines', 'queries', 'sla', 'SLAVisualization.csl')
with concurrent.futures.ThreadPoolExecutor() as executor:
    hfs = [executor.submit(akn.execute_file, client, 'VSO', query, 
            {
                'ScaleUnit': '"' + r[su_column_index] + '"', 
                'OrchestrationId': '"' + r[oid_column_index] + '"'
            }) for r in result.rows]
    histories = [h.result() for h in concurrent.futures.as_completed(hfs)]

# convert to data frames
primary_results = [h.primary_results[0] for h in histories]
dataframes = None
with concurrent.futures.ThreadPoolExecutor() as executor:
    dataframe_futures = [executor.submit(akn.to_dataframe, r) for r in primary_results]
    dataframes = [dff.result() for dff in concurrent.futures.as_completed(dataframe_futures)]
histories = None

# try to filter out false positives? at least a certain number of phases must have been recorded.
required_phases = ('RunAgentJob.SendJob', 'RunAgentJob.JobCompleted')
filtered_dataframes = [df for df in dataframes if all([p in df['PhaseName'].values for p in required_phases])]
number_of_false_positives = len(dataframes) - len(filtered_dataframes)
dataframes = filtered_dataframes
plans_out_of_sla = [df['PlanId'].iat[0] for df in dataframes]
number_of_violations = len(dataframes)

In [None]:
worst_phaseName = ''
worst_count = 0
worst_team = ''

if dataframes:
    # what was the worst phase?
    combined = pd.concat(dataframes, ignore_index=True)
    df = combined.loc[combined['Level'] == 2].groupby(['PhaseName']).size().to_frame('Count').nlargest(1, 'Count')
    if len(df.index) > 0:
        worst_phaseName = df.index[0]
        worst_count = df.iat[0, 0]
        worst_team = worst_phaseName.split('.')[0]
    
    # what was the worst plan?
    violations_df = akn.to_dataframe(violations.primary_results[0])
    df = violations_df.groupby(['PlanId']).size().to_frame('Count').nlargest(1, 'Count')
    plan_with_most_violations = df.index[0]
    plan_with_most_violations_count = df.iat[0, 0]

In [None]:
if number_of_false_positives:
    print(number_of_false_positives, 'plans are likely missing kusto data and were ignored.')
if number_of_violations <= 0:
    print('no problems detected')
else:
    for su in max_scale_units:
        print(max_problems, 'of the problems were in', su)
    
    print(number_of_violations, 
          'plans' if number_of_violations > 1 else 'plan', 
          'had no apparent data problems and', 
          'are' if number_of_violations > 1 else 'is', 
          'out of SLA.')
    
    if plan_with_most_violations in plans_out_of_sla:
        print(plan_with_most_violations, 'had the most violations with', plan_with_most_violations_count)
    
    if worst_phaseName:
        print('"' + worst_phaseName + '"', 'was the slowest phase in', worst_count, 
              'of the', number_of_violations, 'SLA violations.')
        
    print ('\nConclusion:')
    if number_of_violations > 5:    
        print('This is likely a real problem. Open icm against scale units:', max_scale_units)
        print('Initially route it to:       ', worst_team)
    else: 
        print('Too much uncertainty -- do not open any ICMs.')
        
        if number_of_false_positives and float(number_of_false_positives) / float(max_problems) > .5:
            for su in max_scale_units:
                print(su, 'might be unhealthy based on the number of plans missing kusto data.')
        

In [None]:
%matplotlib inline
plt.rcdefaults()

if dataframes:
    number_of_graphs = min(25, len(dataframes))
    fig, axes = plt.subplots(nrows=number_of_graphs,  ncols=1,  figsize=(8, 6 * number_of_graphs), constrained_layout=True)
    for i in range(number_of_graphs):
        df = dataframes[i]
        ax = axes[i] if number_of_graphs > 1 else axes
        ax.axhline(0, color='k')

        x = df['PhaseName']
        xpos = np.arange(len(x))
        y = df['PercentDifference']
        plan_id = df['PlanId'].iloc[0]
        violation_row = violations_df.loc[violations_df['PlanId'] == plan_id]
        title = '\n'.join([
            'plan id:' + plan_id,
            'scale unit:'     + str(violation_row['ScaleUnit'].iloc[0]),
            'definition:'     + str(df['DefinitionName'].iloc[0]),
            'plan duration: ' + str(violation_row['PlanDuration'].iloc[0]),
            'sla duration: '  + str(violation_row['TotalSLADuration'].iloc[0]),
        ])
        ax.title.set_text(title)

        ax.bar(x=xpos, height=y)
        ax.set_xticks(xpos)
        ax.set_xticklabels(x, rotation=45, ha="right")

# output_filename = 'analysis.svg'
# plt.savefig(output_filename, format='svg')