# Impact Investigation
1. Run all cells.
1. View report at the bottom.

In [None]:
# These are just defaults will be overwritten if you use https://github.com/yaananth/nimport
su="tfs-weu-3"
start="2019-07-16T13:30:00.0000000Z"
end="2019-07-16T13:44:00.0000000Z"
url="https://notebooksv2.azure.com/yaananth/projects/06OasuNRs6rK/delays.ipynb"
baseUrl="https://notebooksv2.azure.com/yaananth/projects/06OasuNRs6rK"
service="tfs"

In [None]:
# Import the things we use

# Note you can also use kql https://docs.microsoft.com/en-us/azure/data-explorer/kqlmagic
# %kql is single line magic
# %%kql is cell magic

# https://nbviewer.jupyter.org/github/ipython/ipython/blob/4.0.x/examples/IPython%20Kernel/Rich%20Output.ipynb#HTML
# https://ipython.readthedocs.io/en/stable/inte/magics.html
from IPython.display import display, HTML, Markdown, Javascript, clear_output

# http://pandas-docs.github.io/pandas-docs-travis/user_guide/reshaping.html
import pandas as pd
from pandas import Series, DataFrame
from datetime import datetime, timedelta, timezone
from urllib.parse import urlencode, quote_plus
from requests.utils import requote_uri
import time
import numpy as np
from matplotlib import pyplot as plt
from nimport.utils import tokenize
import json
import os
import calendar as cal

In [None]:
params = {
    "su": su,
    "start": start,
    "end": end,
    "url": url,
    "baseUrl": baseUrl,
    "service": service
}
root = 'devops-pipelines' if os.path.basename(os.getcwd()) != 'devops-pipelines' else ''
    

In [None]:
# This isn't needed if you are bootstraping
#!pip install Kqlmagic --no-cache-dir --upgrade
#!pip install nimport --no-cache-dir --upgrade

In [None]:
# kusto/python utilities
def getTime(timestamp, d):
    return int((cal.timegm(getDateTime(timestamp).timetuple()) + (d * 60)) * 1000);
                
def getDateTime(timestamp):
    s = timestamp[:23] + 'Z' # only allow 5 decimals of precision
    for f in ("%Y-%m-%d %H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%S.%fZ"):
        try:
            return datetime.strptime(s, f);
        except:
            pass
           
def getKustoQuery(csl_filename, params):
    return tokenize(os.path.join(root, 'queries', csl_filename), params)
                
states = {
    "ja": "ja",
    "healthagent": "healthagent"
};

state = "";

In [None]:
%load_ext Kqlmagic

In [None]:
# Active incidents?
# ActiveIncidents.csl
%kql AzureDataExplorer://tenant="Microsoft.com";code;cluster='Icmcluster';database='IcMDataWarehouse' 
q_activeIncidents = getKustoQuery("ActiveIncidents.csl", params)
q_activeIncidentsResult = %kql -query q_activeIncidents
q_activeIncidentsResultDf = q_activeIncidentsResult.to_dataframe()

In [None]:
# Note: KQL has a bug where it copies the wrong code (the previous cluster's auth code instead of current) when we try to auth to multiple clusters
# Copy the code manually instead, if you already closed the dialog, copy code and go to https://microsoft.com/devicelogin
%kql AzureDataExplorer://tenant="Microsoft.com";code;cluster='VSO';database='VSO'

In [None]:
# LocationName.csl
q_loc = getKustoQuery("LocationName.csl", params)
locationNameResult = %kql -query q_loc
locationName = locationNameResult[0]['Tenant']
params["locationName"] = locationName

In [None]:
# WhatChanged.csl
q_whatChanged = getKustoQuery("WhatChanged.csl", params)
q_whatChanged_r = %kql -query q_whatChanged
q_whatChanged_df = q_whatChanged_r.to_dataframe();

In [None]:
# CommandsReason.csl
q_commands = getKustoQuery("impact/CommandsReason.csl", params)
q_commands_r = %kql -query q_commands
q_commands_df = q_commands_r.to_dataframe();

In [None]:
# CommandsAT.csl
q_commandsAT = getKustoQuery("impact/CommandsAT.csl", params)
q_commandsAT_r = %kql -query q_commandsAT
q_commandsAT_df = q_commandsAT_r.to_dataframe();

In [None]:
# CommandsDb.csl
q_commandsDb = getKustoQuery("impact/CommandsDb.csl", params)
q_commandsDb_r = %kql -query q_commandsDb
q_commandsDb_df = q_commandsDb_r.to_dataframe();

In [None]:
print('=' * 50)
print('Report!')
print('=' * 50, '\n\n')

jarvisParams = {'su': su, 'start': getTime(start, -10), 'end': getTime(end, 10), 'service': service }

# jarvis
jarvisLink = """https://jarvis-west.dc.ad.msft.net/dashboard/VSO-ServiceInsights/DevOpsReports/TFS DevOpsReports""" \
    """?overrides=[{"query":"//*[id='Service']","key":"value","replacement":"%(service)s"},""" \
    """{"query":"//*[id='RoleInstance']","key":"value","replacement":""},""" \
    """{"query":"//*[id='ScaleUnit']","key":"value","replacement":"%(su)s"}]""" \
    """&globalStartTime=%(start)s&globalEndTime=%(end)s&pinGlobalTimeRange=true""" % jarvisParams;
print('Jarvis dashboard link:\n', requote_uri(jarvisLink), '\n')

# slow failed reason analysis
print()
print('Is it slow commands or failed commands? =============================')
freq = q_commands_df["Frequency"]
coefficientOfVariance = freq.std()/freq.mean()
failedCount = q_commands_df[q_commands_df["Reason"] == "failed"]["Frequency"].values[0]
slowCount = q_commands_df[q_commands_df["Reason"] == "slow"]["Frequency"].values[0]
reason = "failed or slow"
if coefficientOfVariance > 0.5:
    if failedCount > slowCount:
        reason = "failed"
    else:
        reason = "slow"
else:
    print("Slow and failed commands are too close, both might be contributing...")
if reason:
    print("Probably due to %s commands; Failed - %s, Slow - %s" % (reason, failedCount, slowCount))

# slow failed reason for AT?
print()
print('Is it %s because of AT? =============================' % (reason))
failed = q_commandsAT_df[q_commandsAT_df["Reason"] == "failed"]
slow = q_commandsAT_df[q_commandsAT_df["Reason"] == "slow"]
data = q_commandsAT_df
if reason == "failed":
    data = failed
elif reason == "slow":
    data = slow

coefficientOfVariance = data["Frequency"].std()/data["Frequency"].mean()
    
if coefficientOfVariance > 0.5:
    print("Found variance in AT's for %s commands" % (reason))
    print(data.head(30))
else:
    print("Seems be same across AT's for %s commands" % (reason))
    
# slow failed reason for Db?
print()
print('Is it %s because of Db? =============================' % (reason))
failed = q_commandsDb_df[q_commandsDb_df["Reason"] == "failed"]
slow = q_commandsDb_df[q_commandsDb_df["Reason"] == "slow"]
data = q_commandsDb_df
if reason == "failed":
    data = failed
elif reason == "slow":
    data = slow

coefficientOfVariance = data["Frequency"].std()/data["Frequency"].mean()
    
if coefficientOfVariance > 0.5:
    print("Found variance in Db's for %s commands" % (reason))
    print("Suffix '%s' to database server name" % (".database.windows.net"))
    print("Prefix '%s' to database name" % (params["service"] + "_" + params["locationName"] + "_"))
    print(data.head(30))
else:
    print("Seems be same across Db's for %s commands" % (reason))    
    
# what changed? analysis
print()
print('What changed? =============================')
if(len(q_whatChanged_df.index) == 0):
    print("No relevant changes found...")
else:
    up_prefix = "";
    mit_prefix = "";
    text = "";
    for index, row in q_whatChanged_df.iterrows():
        if(row.title.lower().find('upgrade') != -1):
            if not up_prefix:
                up_prefix += "Looks like, there's upgrade in progress...\n";
            text += """%s %s %s \n""" % (row.TIMESTAMP, row.title, row.buildNumber);
        if(row.title.lower().find('mitigation') != -1):
            if not mit_prefix:
                mit_prefix += "Looks like, there are some mitigations by health agent...\n";
                state += states["healthagent"];
            text += """%s %s %s""" % (row.TIMESTAMP, row.title, row.buildNumber);
            
    if text:
        print(up_prefix + mit_prefix + text)
    else:
        print(q_whatChanged_df)
        
# active incidents?
print()
print('Active incidents? =============================')
otherIncidentsCount = 0;
for index, row in q_activeIncidentsResultDf.iterrows():
    if(row.Title.find("TFS Customer Impact Monitor") == -1):
        otherIncidentsCount+=1;
        
if(otherIncidentsCount > 0):
    print("We found some incidents during the time period, check if they are related...")
    # styling
    def make_clickable(url, text):
        return '{0}'.format(url)

    newDf = q_activeIncidentsResultDf.assign(URL=[*map(lambda x: make_clickable("""https://icm.ad.msft.net/imp/v3/incidents/details/%s/home""" % (x), "ICMLink"), q_activeIncidentsResultDf.IncidentId)])
    print("ICM link to copy - " + "https://icm.ad.msft.net/imp/v3/incidents/details/INCIDENTID/home")
    print(newDf[['IncidentId','Severity','Title']])
else:
    print("No active incidents that could be related are found...")        