In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
from IPython.core.display import HTML as Center

Center(""" <style>
.output_png {
    display: table-cell;
    text-align: center;
    vertical-align: middle;
}
</style> """)


In [None]:
# Adding system path
import sys, pathlib
sys.path.append(str(pathlib.Path.cwd().parent.parent))
# sys.path

In [None]:
# Set to show warnings only once
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Setting up displays
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
from dash import Dash,html,dcc,Input,Output
app = Dash(__name__)
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tabulate import tabulate
color_scheme=["red","blue","green","orange","purple","brown","pink","gray","olive","cyan","darkviolet","goldenrod","darkgreen","chocolate","lawngreen"]

In [None]:
# Initialize connection to Snowflake and set analysis date
from optiml.connection import SnowflakeConnConfig
connection = SnowflakeConnConfig(accountname='jg84276.us-central1.gcp',warehousename="XSMALL_WH").create_connection()
# Initialize local environment
import os
cache_dir = os.path.expanduser('~/data/kiva')
# Initialize query library
from optiml.backend.cost_profile import CostProfile,get_previous_dates
cqlib = CostProfile(connection, 'KIV', cache_dir)
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import interact
sdate = '2022-09-12'
edate = '2022-10-12'

### Cost by usage category

In [None]:

output = widgets.Output()


@output.capture(clear_output=True,wait=True) 
def usage_category_plots(start_date,end_date):
    df = cqlib.total_cost_breakdown_ts(start_date, end_date)
    df = df.fillna('Unassigned')
    df_by_usage_category = df.groupby("category_name").sum("numeric_only").reset_index()
    df_by_usage_category = df_by_usage_category.round(2)    
    df_by_usage_category.loc[len(df_by_usage_category.index)] = ['Total', df_by_usage_category['credits'].sum(), df_by_usage_category['dollars'].sum()]
    df_by_usage_category_comp=df_by_usage_category.rename(columns={"credits":"current_month_credits","dollars":"current_month_dollars"})
    p1_sdate, p1_edate = get_previous_dates(sdate, edate, 1)
    df_prev = cqlib.total_cost_breakdown_ts(p1_sdate, p1_edate)
    df_prev = df_prev.fillna('Unassigned')
    df_by_usage_category_prev = df_prev.groupby("category_name").sum("numeric_only").reset_index()
    df_by_usage_category_prev = df_by_usage_category_prev.round(2)    
    df_by_usage_category_prev.loc[len(df_by_usage_category_prev.index)] = ['Total', df_by_usage_category_prev['credits'].sum(), df_by_usage_category_prev['dollars'].sum()]
    df_by_usage_category_prev=df_by_usage_category_prev.rename(columns={"credits":"previous_month_credits","dollars":"previous_month_dollars"})
    df_category= pd.merge(df_by_usage_category_comp, df_by_usage_category_prev,how="outer")
    df_category["percent_change"]=((df_category["current_month_dollars"] - df_category["previous_month_dollars"])/df_category["previous_month_dollars"]*100).round(2)
    df_category.fillna(0, inplace=True)
    print('Credit and dollar by usage category')
    print('----------------------------------------------------')
    print(tabulate(df_category, headers='keys', tablefmt='rounded_outline', showindex=False))
    df_by_usage_category.reset_index(inplace=True)
    df_by_usage_category.drop(columns=["index"], inplace=True)
    df_by_usage_category = df_by_usage_category.drop(len(df_by_usage_category)-1) 
    fig1 = make_subplots(
    rows=1, cols=2,
    specs=[[{"type": "pie"},{"type": "pie"}]],
    subplot_titles=("Dollars", "Credits")
    )

    fig1.add_trace(go.Pie(labels=df_by_usage_category['category_name'].tolist(), values=df_by_usage_category['dollars'].tolist(),name="Dollars", 
    rotation=45, marker_colors=color_scheme),row=1,col=1)
    fig1.add_trace(go.Pie(labels=df_by_usage_category['category_name'].tolist(), values=df_by_usage_category['credits'].tolist(),name='Credits',
        rotation=45, marker_colors=color_scheme),row=1,col=2)

    fig1.update_layout(
    title={
    'text': "Breakdown of total cost by usage category",
    'y':0.95,
    'x':0.5,
    'xanchor': 'center',
    'yanchor': 'top'})
    display(fig1)
    df_by_category_ts = df.groupby(['category_name','hourly_start_time']).sum('numeric_only').reset_index()
    fig = px.area(df_by_category_ts, x="hourly_start_time", y="dollars", color="category_name",color_discrete_sequence=color_scheme)
    fig.update_layout(
    title={
        'text': "Timeseries of cost by usage category",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    xaxis_title="Hourly start time (UTC)",
    yaxis_title="US Dollars")
    display(fig)

    # display(df_usage)

interact(usage_category_plots,start_date=widgets.DatePicker(value=pd.to_datetime('2022-9-12')),
        end_date=widgets.DatePicker(value=pd.to_datetime('2022-10-12')))
output






### Cost by user

In [None]:
start_date=widgets.DatePicker(value=pd.to_datetime('2022-09-12'))
end_date=widgets.DatePicker(value=pd.to_datetime('2022-10-12'))

output = widgets.Output()
@output.capture(clear_output=True,wait=True)
def user_plots(start_date,end_date):
      df = cqlib.cost_by_user_ts(start_date, end_date)
      df_by_user = df.groupby(['user_name']).sum('numeric_only').reset_index()
      df_by_user = df_by_user.round(2)
      # df_by_user.loc[len(df_by_user.index)] =  df_by_user['approximate_credits_used'].sum()
      df_by_user.loc[len(df_by_user.index)] = ['Total', df_by_user['approximate_credits_used'].sum()]
      df_by_user_comp=df_by_user.rename(columns={"approximate_credits_used":"current_month_credits"})
      p1_sdate, p1_edate = get_previous_dates(sdate, edate, 1)
      df_prev = cqlib.cost_by_user_ts(p1_sdate, p1_edate)
      df_prev = df_prev.fillna('Unassigned')
      df_by_user_prev= df_prev.groupby(['user_name']).sum('numeric_only').reset_index()
      df_by_user_prev = df_by_user_prev.round(2)
      df_by_user_prev.loc[len(df_by_user_prev.index)] = ['Total', df_by_user_prev['approximate_credits_used'].sum()]
      df_by_user_prev_comp=df_by_user_prev.rename(columns={"approximate_credits_used":"previous_month_credits"})
      df_user = pd.merge(df_by_user_comp, df_by_user_prev_comp,how="outer")
      df_user["percent_change"]=((df_user["current_month_credits"] - df_user["previous_month_credits"])/df_user["previous_month_credits"]*100).round(2)
      df_user.fillna(0, inplace=True)
      print('Credit and dollar by warehouse')
      print('----------------------------------------------------')
      print(tabulate(df_user, headers='keys', tablefmt='rounded_outline', showindex=False))
      df_by_user["percent_usage"] = df_by_user["approximate_credits_used"]/df_by_user[df_by_user["user_name"]=="Total"]["approximate_credits_used"].values[0]*100
      df_by_user["percent_usage"] = df_by_user["percent_usage"].round(3)
      x = df_by_user.loc[df_by_user["percent_usage"]<1.00].sum(axis=0,numeric_only=True)
      df_low_usage_users = df_by_user.loc[df_by_user["percent_usage"] < 1.00].reset_index(drop=True)
      df_by_user = df_by_user.loc[df_by_user["percent_usage"] > 1.00].reset_index(drop=True)
      df_by_user.loc[len(df_by_user)-1.5] = ["Low_usage_users", x["approximate_credits_used"], x["percent_usage"]]
      df_by_user = df_by_user.sort_index().reset_index(drop=True)
      df_by_user.drop(df_by_user.tail(1).index,inplace=True)
      fig1 = make_subplots(
      rows=1, cols=1,
      specs=[[{"type": "pie"}]],
      subplot_titles=("Credits")
      )

      fig1.add_trace(go.Pie(labels=df_by_user['user_name'].tolist(), values=df_by_user['approximate_credits_used'].tolist(),name="Credits", rotation=270,marker_colors=color_scheme),row=1,col=1)
      # fig.add_trace(go.Pie(labels=df_by_user['user_name'].tolist(), values=df_by_user['credits'].tolist(),name='Credits', rotation=45,marker_colors=color_scheme),row=1,col=2)

      fig1.update_layout(
      title={
            'text': "Breakdown of total cost by user",
            'y':0.1,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'bottom'})
      display(fig1)
      df_by_user_ts = df.groupby(['user_name','hourly_start_time']).sum('numeric_only').reset_index()
      df_by_user_ts = df_by_user_ts[~df_by_user_ts.user_name.isin(df_low_usage_users["user_name"].values)]
      df_by_user_ts.reset_index(drop=True)
      fig = px.area(df_by_user_ts, x="hourly_start_time", y="approximate_credits_used", color="user_name",color_discrete_sequence=color_scheme)
      fig.update_layout(
      title={
            'text': "Timeseries of cost by user",
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
      xaxis_title="Hourly start time (UTC)",
      yaxis_title="Credits used (approx.)"
      )
      display(fig)
interact(user_plots,start_date=start_date,end_date=end_date)
output
      


### Cost by warehouse

In [None]:

output = widgets.Output()
@output.capture(clear_output=True,wait=True)
def warehouse_plots(start_date,end_date):
      df = cqlib.cost_by_wh_ts(start_date, end_date)
      df_by_wh = df.groupby(['warehouse_name']).sum('numeric_only').reset_index()
      df_by_wh = df_by_wh.round(2)
      df_by_wh.loc[len(df.index)] = ['Total', df_by_wh['credits'].sum(), df_by_wh['dollars'].sum(),  df_by_wh['cloud_services_credits'].sum(), df_by_wh['cloud_services_dollars'].sum()]
      df_by_wh_comp=df_by_wh.iloc[:,:-2]
      df_by_wh_comp=df_by_wh_comp.rename(columns={"credits":"current_month_credits","dollars":"current_month_dollars"})
     
      p1_sdate, p1_edate = get_previous_dates(sdate, edate, 1)
      df_prev = cqlib.cost_by_wh_ts(p1_sdate, p1_edate)
      df_prev = df_prev.fillna('Unassigned')
      df_by_wh_prev= df_prev.groupby(['warehouse_name']).sum('numeric_only').reset_index()
      df_by_wh_prev = df_by_wh_prev.round(2)
      df_by_wh_prev.loc[len(df.index)] = ['Total', df_by_wh_prev['credits'].sum(), df_by_wh_prev['dollars'].sum(),  df_by_wh_prev['cloud_services_credits'].sum(), df_by_wh_prev['cloud_services_dollars'].sum()]
      df_by_wh_prev_comp=df_by_wh_prev.iloc[:,:-2]
      df_by_wh_prev_comp=df_by_wh_prev_comp.rename(columns={"credits":"previous_month_credits","dollars":"previous_month_dollars"})
     
      
      df_wh= pd.merge(df_by_wh_comp, df_by_wh_prev_comp,how="outer")
      df_wh["percent_change"]=((df_wh["current_month_dollars"] - df_wh["previous_month_dollars"])/df_wh["previous_month_dollars"]*100).round(2)
      df_wh.fillna(0, inplace=True)
      print('Credit and dollar by warehouse')
      print('----------------------------------------------------')
      print(tabulate(df_wh, headers='keys', tablefmt='rounded_outline', showindex=False))

      df_by_wh.drop(df_by_wh.tail(1).index,inplace=True)
      fig1 = make_subplots(
      rows=1, cols=1,
      specs=[[{"type": "pie"}]],
      )

      fig1.add_trace(go.Pie(labels=df_by_wh['warehouse_name'].tolist(), values=df_by_wh['dollars'].tolist(),name='dollars',marker_colors=color_scheme),row=1,col=1)

      fig1.update_layout(
      title={
            'text': "Breakdown of total cost by warehouse",
            'y':0.1,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'})
      display(fig1)
      df_by_wh_ts = df.groupby(['warehouse_name','hourly_start_time']).sum('numeric_only').reset_index()
      fig = px.area(df_by_wh_ts, x="hourly_start_time", y="credits", color="warehouse_name",color_discrete_sequence=color_scheme)
      fig.update_layout(
      title={
            'text': "Timeseries of cost by warehouse",
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
      xaxis_title="Hourly start time (UTC)",
      yaxis_title="Credits used"
      )
      display(fig)
interact(warehouse_plots,
start_date=widgets.DatePicker(value=pd.to_datetime('2022-09-12')),end_date=widgets.DatePicker(value=pd.to_datetime('2022-10-12')))
output
      
      




## Cost by Partner Tools

In [None]:

output = widgets.Output()
@output.capture(clear_output=True,wait=True)

def pt_plots(start_date,end_date):
      df=cqlib.cost_by_partner_tool_ts(start_date, end_date)
      df_by_pt = df.groupby(['client_application_name']).sum('numeric_only').reset_index()
      df_by_pt = df_by_pt.round(2)
      df_by_pt.loc[len(df.index)] = ['Total', df_by_pt['approximate_credits_used'].sum()]
      df_by_pt_comp=df_by_pt.rename(columns={"approximate_credits_used":"current_month_credits"})
      p1_sdate, p1_edate = get_previous_dates(sdate, edate, 1)
      df_prev = cqlib.cost_by_partner_tool_ts(p1_sdate, p1_edate)
      df_prev = df_prev.fillna('Unassigned')
      df_by_pt_prev= df_prev.groupby(['client_application_name']).sum('numeric_only').reset_index()
      df_by_pt_prev = df_by_pt_prev.round(2)
      df_by_pt_prev.loc[len(df.index)] = ['Total', df_by_pt_prev['approximate_credits_used'].sum()]
      df_by_pt_prev_comp=df_by_pt_prev.rename(columns={"approximate_credits_used":"previous_month_credits"})
      df_pt=pd.merge(df_by_pt_comp,df_by_pt_prev_comp,how='outer')
      df_pt["percent_change"]=((df_pt["current_month_credits"] - df_pt["previous_month_credits"])/df_pt["previous_month_credits"]*100).round(2)
      df_pt.fillna(0, inplace=True)
      print('Credits by partner tools')
      print('---------------------------------------------------')
      print(tabulate(df_pt,headers='keys', tablefmt='rounded_outline', showindex=False))
      df_by_pt.drop(df_by_pt.tail(1).index,inplace=True)
      fig1= make_subplots(
      rows=1, cols=1,
      specs=[[{"type": "pie"}]],
      )

      fig1.add_trace(go.Pie(labels=df_by_pt['client_application_name'].tolist(), values=df_by_pt['approximate_credits_used'].tolist(),name='credits',marker_colors=color_scheme, rotation=45),row=1,col=1)

      fig1.update_layout(
      title={
            'text': "Breakdown of total cost by partner tools",
            'y':0.1,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'})
      display(fig1)
      df_by_pt_ts = df.groupby(['client_application_name','hourly_start_time']).sum('numeric_only').reset_index()
      fig = px.area(df_by_pt_ts, x="hourly_start_time", y="approximate_credits_used", color="client_application_name",color_discrete_sequence=color_scheme)
      fig.update_layout(
      title={
            'text': "Timeseries of cost by partner tools",
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
      xaxis_title="Hourly start time (UTC)",
      yaxis_title="Credits used (approx.)"
      )
      display(fig)
interact(pt_plots,
start_date=widgets.DatePicker(value=pd.to_datetime('2022-09-12')),end_date=widgets.DatePicker(value=pd.to_datetime('2022-10-12')))
output
            