# SQL Investigation
1. Run all cells.
1. View report at the bottom.

In [None]:
# These are just defaults will be overwritten if you use nimport pip
db = "Tfs_tfsprodcus2_37253a68-972a-4bf4-8c5f-a259ba4d42cd"
start = "2019-07-31T17:30:00.0000000Z"
end = "2019-07-31T18:30:36.0000000Z"
url = "https://notebooksv2.azure.com/yaananth/projects/06OasuNRs6rK/delays.ipynb"
baseUrl = "https://notebooksv2.azure.com/yaananth/projects/06OasuNRs6rK"

In [None]:
%%capture
!pip install nimport azure-kusto-notebooks

In [None]:
# Import the things we use

# Note you can also use kql https://docs.microsoft.com/en-us/azure/data-explorer/kqlmagic
# %kql is single line magic
# %%kql is cell magic

# https://nbviewer.jupyter.org/github/ipython/ipython/blob/4.0.x/examples/IPython%20Kernel/Rich%20Output.ipynb#HTML
# https://ipython.readthedocs.io/en/stable/inte/magics.html
from IPython.display import display, HTML, Markdown, Javascript, clear_output

# http://pandas-docs.github.io/pandas-docs-travis/user_guide/reshaping.html
import pandas as pd
pd.options.display.html.table_schema = True
from pandas import Series, DataFrame
from datetime import datetime, timedelta, timezone
from urllib.parse import urlencode, quote_plus
from requests.utils import requote_uri
import time
import numpy as np
from matplotlib import pyplot as plt
from nimport.utils import tokenize, open_nb
import json
import os
import calendar as cal
import concurrent.futures
from azure.kusto.notebooks import utils as akn

In [None]:
params = {
    "db": db,
    "start": start,
    "end": end,
    "url": url,
    "baseUrl": baseUrl
}
root = 'devops-pipelines' if os.path.basename(os.getcwd()) != 'devops-pipelines' else ''
queryPath = os.path.join(root, 'queries')
    

In [None]:
# authenticate kusto client
# you will need to copy the token into a browser window for AAD auth. 
client = akn.get_client('https://vso.kusto.windows.net')

In [None]:
sqlPath = os.path.join(queryPath, 'sql')
q_data = os.path.join(sqlPath, "GetData.csl")
q_whatsSlow = os.path.join(sqlPath, "WhatsSlow.csl")
with concurrent.futures.ThreadPoolExecutor() as executor:
    # materialize so that we have all information we might need
    p1 = executor.submit(akn.execute_file, client, 'VSO', q_data, params)
    q_data_df = akn.to_dataframe_from_future(p1)
    params["service"] = q_data_df["Service"][0]
    params["su"] =q_data_df["ScaleUnit"][0]
    
    p2 = executor.submit(akn.execute_file, client, 'VSO', q_whatsSlow, params)

q_whatsSlow_df = akn.to_dataframe_from_future(p2)  


In [None]:
# Initialize for further analysis later
q_cpuTop_df = None
q_cpuXEvent_df = None
q_cpuJob_df = None
q_cpuActivity_df = None

In [None]:
def cpuAnalysis():
    global q_cpuTop_df
    global q_cpuXEvent_df
    q_cpuTop = os.path.join(sqlPath, "CpuTop.csl")
    q_cpuXEvent = os.path.join(sqlPath, "CpuXevent.csl")
    with concurrent.futures.ThreadPoolExecutor() as executor:
        p1 = executor.submit(akn.execute_file, client, 'VSO', q_cpuTop, params)
        p2 = executor.submit(akn.execute_file, client, 'VSO', q_cpuXEvent, params)

    q_cpuTop_df = akn.to_dataframe_from_future(p1)
    
    q_cpuXEvent_df = akn.to_dataframe_from_future(p2)
    maxTime = q_cpuXEvent_df["sum_CpuTime"].max()
    q_cpuXEvent_df['CpuTimeDiff'] = q_cpuXEvent_df["sum_CpuTime"].map(lambda x: x/maxTime)

def cpuAnalysisJob():
    global q_cpuJob_df
    q_cpuJob = os.path.join(sqlPath, "CpuJob.csl")
    with concurrent.futures.ThreadPoolExecutor() as executor:
        p1 = executor.submit(akn.execute_file, client, 'VSO', q_cpuJob, params)

    q_cpuJob_df = akn.to_dataframe_from_future(p1)

def cpuAnalysisActivity():
    global q_cpuActivity_df
    q_cpuActivity = os.path.join(sqlPath, "CpuActivity.csl")
    with concurrent.futures.ThreadPoolExecutor() as executor:
        p1 = executor.submit(akn.execute_file, client, 'VSO', q_cpuActivity, params)

    q_cpuActivity_df = akn.to_dataframe_from_future(p1)

In [None]:
print('=' * 50)
print('Report!')
print('=' * 50, '\n\n')

jarvisParams = {'su': params["su"], 'start': akn.get_time(start, -10), 'end': akn.get_time(end, 10), 'service': params["service"], 'db': db }

jaJarvisLink = """https://jarvis-west.dc.ad.msft.net/dashboard/VSO-ServiceInsights/PlatformViews/SQLAzureDatabase""" \
    """?overrides=[{"query":"//*[id='Service']","key":"value","replacement":"%(service)s"},""" \
    """{"query":"//*[id='ScaleUnit']","key":"value","replacement":"%(su)s"},""" \
    """{"query":"//*[id='__DatabaseName']","key":"value","replacement":"%(db)s"}]""" \
    """&globalStartTime=%(start)s&globalEndTime=%(end)s&pinGlobalTimeRange=true""" % jarvisParams;
print('Jarvis dashboard link for sql:\n', requote_uri(jaJarvisLink), '\n')

print()
print("Parameters used:")
display(params)

print()

## Where is the database at?
print("Database is at: ")
so = q_whatsSlow_df["ServiceObjective"].unique()
if so.size > 1:
    print("We found different service objectives..looks like db was changed?")
print(so) 

print()

## What's slow?
cpu = q_whatsSlow_df["avg_AverageCpuPercentage"]
memory = q_whatsSlow_df["avg_AverageMemoryUsagePercentage"]
logWrite= q_whatsSlow_df["avg_AverageLogWriteUtilizationPercentage"]
worker= q_whatsSlow_df["max_MaximumWorkerPercentage"]
cpu_coefficientOfVariance = cpu.std()/cpu.mean()
memory_coefficientOfVariance = memory.std()/memory.mean()
logWrite_coefficientOfVariance = logWrite.std()/logWrite.mean()
worker_coefficientOfVariance = worker.std()/worker.mean()
maxVar = 0.5

reasons = "Possibly due to: "
if cpu_coefficientOfVariance >= maxVar:
    reasons+= "cpu (max: %s), " % (cpu.max())
if memory_coefficientOfVariance >= maxVar:
    reasons+= "memory (max: %s), " % (memory.max())
if logWrite_coefficientOfVariance >= maxVar:
    reasons+= "logwrite (max: %s), " % (logWrite.max())
if worker_coefficientOfVariance >= maxVar:
    reasons+= "worker (max: %s), " % (worker.max())
print(reasons)

if cpu.max() >= 80:
    print("We found high CPU, let's start with CPU analysis...")
    
    cpuAnalysis()
    
    #print()
    #print("Top CPU commands:")
    #display(q_cpuTop_df)
    
    print()
    print("Who's causing these commands?:")
    commandsToConsider = q_cpuXEvent_df[q_cpuXEvent_df["CpuTimeDiff"] >= 0.5]
    jobCommand = commandsToConsider[commandsToConsider["TypeName"].str.contains('Job')]
    if len(jobCommand) >= 1:
        print("Possibly due to a job...")
        display(jobCommand)
        cpuAnalysisJob()
        
        print()
        display(q_cpuJob_df)
    
    activityCommand = commandsToConsider[commandsToConsider["TypeName"].str.contains('Activity')]
    if len(activityCommand) >= 1 and activityCommand["ObjectName"][0]:
        print("Possibly due to user activity...")
        display(activityCommand)
        cpuAnalysisActivity()
        
        print()
        display(q_cpuActivity_df)
        