In [1]:
import sys, pathlib
sys.path.append(str(pathlib.Path.cwd().parent))
# sys.path

In [2]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tabulate import tabulate

In [3]:
%load_ext autoreload
%autoreload 2
%aimport optiml.queries

## Initialization

In [4]:
# Initialize Snowflake connection to Snowflake
from optiml.connection import SnowflakeConnConfig
connection = SnowflakeConnConfig(accountname='jg84276.us-central1.gcp',warehousename="XSMALL_WH").create_connection()
# Initialize local environment
import os
cache_dir = os.path.expanduser('~/data/kiva')
# Initialize query library
from optiml.queries import SNFLKQuery
qlib = SNFLKQuery(connection, 'KIV', cache_dir)
sdate = '2022-09-01'
edate = '2022-10-31'

/Users/skumar/.snowsql/config
Connecting...


# Total cost breakdown

## Visualization

## Breakdown of total cost by usage type

In [5]:
df = qlib.total_cost_breakdown(sdate, edate).round(2)
df.loc[len(df.index)] = ['Total', df['credits'].sum(), df['dollars'].sum()]
print(tabulate(df, headers='keys', tablefmt='rounded_outline', showindex=False))


cost_category            credits    dollars
---------------------  ---------  ---------
Storage                     0        306.12
Compute                  3991.17    7982.33
Cloud Service             177.06     354.11
Autoclustering              0          0
Materialization Views       0          0
Replication                 0          0
Search Optimization         0          0
Snowpipe                    0          0
Total                    4168.23    8642.56


In [6]:
df = df.drop(len(df)-1) ## Remove the last row of totals for the plot
fig = make_subplots(
    rows=1, cols=2,
    specs=[[{"type": "pie"},{"type": "pie"}]]
)

fig.add_trace(go.Pie(labels=df['cost_category'].tolist(), values=df['dollars'].tolist(),name="Dollars"),row=1,col=1)
fig.add_trace(go.Pie(labels=df['cost_category'].tolist(), values=df['credits'].tolist(),name='Credits'),row=1,col=2)

fig.update_layout(
    title={
        'text': "Breakdown of total cost by usage category",
        'y':0.1,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'bottom'})
fig.show()

## Analysis
- Compute forms overwhelming majority of the cost
- Cloud services are in aggregate < 10 % of compute so are not an added expense
- Autoclustering used to be on in 2020 but isn't anymore
- Should explore if there are any compute efficiencies to be gained by 
    - Autoclustering
    - Materialized views / caching
    - Search optimization
    - Query optimizations / reducing scans

* Largely used by compute
* In 2020 auto clustering was on but was turned off - why?
* Storage credits at 0 since storage is charged as a flat rate - are numbers consistent?
* Definitely did not use $80K worth of credits in October since total bill is $50K - so something is wrong!


## Breakdown of total cost by warehouse

In [7]:
df = qlib.cost_by_wh(sdate, edate).round(2)
df.loc[len(df.index)] = ['Total', df['credits'].sum(), df['dollars'].sum()]
print(tabulate(df, headers='keys', tablefmt='rounded_outline', showindex=False))

warehouse_name         credits    dollars
-------------------  ---------  ---------
PROD_WH                2164.28    4328.57
ML_WH                   113.63     227.27
DEV_WH                 1289.47    2578.94
DAILY_REFRESH_WH        600.83    1201.66
CLOUD_SERVICES_ONLY       0          0
Total                  4168.21    8336.44


In [8]:
df = df.drop(len(df)-1) ## Remove the last row of totals for the plot
fig = make_subplots(
    rows=1, cols=2,
    specs=[[{"type": "pie"},{"type": "pie"}]]
)

fig.add_trace(go.Pie(labels=df['warehouse_name'].tolist(), values=df['dollars'].tolist(),name='dollars'),row=1,col=1)
fig.add_trace(go.Pie(labels=df['warehouse_name'].tolist(), values=df['credits'].tolist(),name='credits'),row=1,col=2)

fig.update_layout(
    title={
        'text': "Breakdown of total cost by warehouse",
        'y':0.1,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'bottom'})
fig.show()

## Analysis
- Sum of the compute across all the warehouses is 1255.47 credits which is consistent with total cost of compute above. Sanity check to ensure we are not missing any compute or warehouse

## Breakdown of total cost by user (to do)

## Breakdown of total cost as a time series

In [9]:
df = qlib.cost_by_partner_tool_ts(sdate, edate).round(2)

In [10]:
df.loc[len(df.index)] = ['Total', 'ALL WHs' ,df['approximate_credits_used'].sum()]
print(tabulate(df, headers='keys', tablefmt='rounded_outline', showindex=False))

client_application_name    warehouse_name      approximate_credits_used
-------------------------  ----------------  --------------------------
JDBC                       PROD_WH                              1789.19
Python                     DEV_WH                               1006.81
Python                     DAILY_REFRESH_WH                      571.81
JDBC                       DEV_WH                                242.51
ODBC                       PROD_WH                               205.23
Python                     PROD_WH                               166.52
Python                     ML_WH                                 113.53
ODBC                       DEV_WH                                 26.42
Go                         DEV_WH                                  8.52
Snowflake UI               DEV_WH                                  4.18
Go                         PROD_WH                                 1.18
Snowflake UI               PROD_WH                              

In [11]:
df = df.drop(len(df)-1) ## Remove the last row of totals for the plot
fig = make_subplots(
    rows=1, cols=2,
    specs=[[{"type": "pie"},{"type": "pie"}]]
)

fig.add_trace(go.Pie(labels=df['client_application_name'].tolist(), values=df['approximate_credits_used'].tolist(),name='credits'),row=1,col=1)
fig.add_trace(go.Pie(labels=df['warehouse_name'].tolist(), values=df['approximate_credits_used'].tolist(),name='credits'),row=1,col=2)

fig.update_layout(
    title={
        'text': "Breakdown of total cost by warehouse",
        'y':0.1,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'bottom'})
fig.show()