In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
# Adding system path
import sys, pathlib
sys.path.append(str(pathlib.Path.cwd().parent.parent))
# sys.path

In [None]:
# Set to show warnings only once
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Setting up displays
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

import math
from plotly import tools
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tabulate import tabulate
color_scheme=["red","blue","green","orange","purple","brown","pink","gray","olive","cyan","darkviolet","goldenrod","darkgreen","chocolate","lawngreen"]

In [None]:
# Setting up autoreload for libs
%load_ext autoreload
%autoreload 2
%aimport optiml.backend.warehouse_profile

In [None]:
# Initialize connection to Snowflake and set analysis date
from optiml.connection import SnowflakeConnConfig
connection = SnowflakeConnConfig(accountname='jg84276.us-central1.gcp',warehousename="XSMALL_WH").create_connection()
# Initialize local environment
import os
cache_dir = os.path.expanduser('~/data/kiva')

# Analysis setup
sdate = '2022-09-28'
edate = '2022-10-12'
delta = 'hour'
company = 'KIV'
account_type = "standard"

# Initialize query library
from optiml.backend.warehouse_profile import WarehouseProfile
wqlib = WarehouseProfile(connection, company, cache_dir)
from optiml.backend.cost_profile import CostProfile, get_previous_dates
cqlib = CostProfile(connection, company, cache_dir, account_type)

print(f"The analysis is carried our for date range {sdate} to {edate}")

### Warehouse load analysis

In [None]:
wh_credits = cqlib.cost_by_wh_ts(sdate, edate)

In [None]:
wh_configs = wqlib.warehouse_config()
wh_name = wh_configs["name"].unique().tolist()
wh_load = {}
for n in wh_name:
    wh_load[n] = wqlib.wh_queued_load_ts(start_date=sdate,end_date=edate,wh_name=n,delta=delta)

In [None]:
for n in wh_name:
    if not wh_load[n].empty: 
        df_load = wh_load[n]
        df_credits = wh_credits[wh_credits["warehouse_name"] == n].reset_index(drop=True)
        df_credits.sort_values(by=["hourly_start_time"],inplace=True)
        df_credits.reset_index(drop=True,inplace=True)

        trace1 = go.Bar(
            x=df_load["hourly_start_time"], y=df_load["avg_queued_load"],
            name='Queued Load',marker=dict(color='rgb(222,0,0)')
        )
        trace2 = go.Bar(
            x=df_load["hourly_start_time"], y=df_load["avg_running_load"],
            name='Running load',marker=dict(color='rgb(0,0,255)')
        )
        trace3 = go.Scatter(
          x=df_credits['hourly_start_time'] ,y=df_credits['credits'],
            name='Credits consumed'
        )

        f = make_subplots(specs=[[{"secondary_y": True}]])
        
        f.add_trace(trace1, secondary_y=False)
        f.add_trace(trace2, secondary_y=False)
        f.update_layout(barmode='stack')
        f.add_trace(trace3, secondary_y=True)
        f.update_layout(
            xaxis_title="Hourly start time (UTC)",
            title=n,
            xaxis=dict(showgrid=False),
            yaxis=dict(showgrid=False)
        )
        f.update_yaxes(title_text="Query Load", secondary_y=False)
        f.update_yaxes(title_text="Credits", secondary_y=True)
        f.update_yaxes(rangemode="tozero", secondary_y=True)
        
        f.show()

In [None]:
## Scaling policy for DEV_WH and dither queries to other places when WH query load is low
## Analyze which queries are triggering a queue in PROD_WH.
## Analyze if there is opportunity to dither queries between ML_WH, DAILY_REFRESH_WH and DEV_WH
## Us there a specific user who is triggering a queue in PROD_WH, DEV_WH
## Is there a specific user whose queries are going to DEV_WH during quiet times?