In [1]:
# Adding system path
import sys, pathlib
sys.path.append(str(pathlib.Path.cwd().parent))
# sys.path

In [2]:
# Set to show warnings only once
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Setting up displays
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tabulate import tabulate
color_scheme=["red","blue","green","orange","purple","brown","pink","gray","olive","cyan","darkviolet","goldenrod","darkgreen","chocolate","lawngreen"]

In [4]:
# Setting up autoreload for libs
%load_ext autoreload
%autoreload 2
%aimport optiml.queries

In [5]:
# Initialize connection to Snowflake and set analysis date
from optiml.connection import SnowflakeConnConfig
connection = SnowflakeConnConfig(accountname='jg84276.us-central1.gcp',warehousename="XSMALL_WH").create_connection()
# Initialize local environment
import os
cache_dir = os.path.expanduser('~/data/kiva')
# Initialize query library
from optiml.queries import SNFLKQuery
qlib = SNFLKQuery(connection, 'KIV', cache_dir)
sdate = '2022-10-01'
edate = '2022-10-31'
print(f"The analysis is carried our for date range {sdate} to {edate}")

Connecting...
The analysis is carried our for date range 2022-10-01 to 2022-10-31


# Total cost breakdown 

## Cost by usage category

In [6]:
df = qlib.total_cost_breakdown_ts(sdate, edate).round(2)
# df.head()
df_by_usage_category = df.groupby("category_name").sum("numeric_only").reset_index()
df_by_usage_category.loc[len(df.index)] = ['Total', df['credits'].sum(), df['dollars'].sum()]
print(tabulate(df_by_usage_category, headers='keys', tablefmt='rounded_outline', showindex=False))

category_name      credits    dollars
---------------  ---------  ---------
Cloud services       46         92.34
Compute            3799.14    7597.82
Storage               0         76.66
Total              3845.14    7766.82


In [7]:
# Pie charts for total cost breakdown
## Remove the last row of totals for the plot
df_by_usage_category.reset_index(inplace=True)
df_by_usage_category.drop(columns=["index"], inplace=True)
df_by_usage_category = df_by_usage_category.drop(len(df_by_usage_category)-1) 
fig = make_subplots(
    rows=1, cols=2,
    specs=[[{"type": "pie"},{"type": "pie"}]],
    subplot_titles=("Dollars", "Credits")
)

fig.add_trace(go.Pie(labels=df_by_usage_category['category_name'].tolist(), values=df_by_usage_category['dollars'].tolist(),name="Dollars", rotation=45,marker_colors=color_scheme),row=1,col=1)
fig.add_trace(go.Pie(labels=df_by_usage_category['category_name'].tolist(), values=df_by_usage_category['credits'].tolist(),name='Credits', rotation=45,marker_colors=color_scheme),row=1,col=2)

fig.update_layout(
    title={
        'text': "Breakdown of total cost by usage category",
        'y':0.1,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'bottom'})
fig.show()


## Cost by usage category timeseries

In [9]:
df_by_category_ts = df.groupby(['category_name','hourly_start_time']).sum('numeric_only').reset_index()
fig = px.area(df_by_category_ts, x="hourly_start_time", y="credits", color="category_name",color_discrete_sequence=color_scheme)
fig.show()

## Cost by user

In [10]:
df_by_user = df.groupby(['user_name']).sum('numeric_only').reset_index()
print(tabulate(df_by_user, headers='keys', tablefmt='rounded_outline', showindex=False))

user_name      credits    dollars
-----------  ---------  ---------
DBT_DEV         390.86     781.7
DBT_PROD        748.51    1497.06
GREGORYW       1209.09    2418.02
ROBS             69.72     139.26
Snowflake        46        169
VERTEX_PROD     748.51    1497.06


In [11]:
fig = make_subplots(
    rows=1, cols=2,
    specs=[[{"type": "pie"},{"type": "pie"}]],
    subplot_titles=("Dollars", "Credits")
)

fig.add_trace(go.Pie(labels=df_by_user['user_name'].tolist(), values=df_by_user['dollars'].tolist(),name="Dollars", rotation=45,marker_colors=color_scheme),row=1,col=1)
fig.add_trace(go.Pie(labels=df_by_user['user_name'].tolist(), values=df_by_user['credits'].tolist(),name='Credits', rotation=45,marker_colors=color_scheme),row=1,col=2)

fig.update_layout(
    title={
        'text': "Breakdown of total cost by user",
        'y':0.1,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'bottom'})
fig.show()

In [12]:
df_by_user_ts = df.groupby(['user_name','hourly_start_time']).sum('numeric_only').reset_index()
fig = px.area(df_by_user_ts, x="hourly_start_time", y="credits", color="user_name",color_discrete_sequence=color_scheme)
fig.show()

## Cost by warehouse

In [13]:
df = qlib.cost_by_wh_ts(sdate, edate).round(2)
df.head()
# df.loc[len(df.index)] = ['Total', df['credits'].sum(), df['dollars'].sum()]
# print(tabulate(df, headers='keys', tablefmt='rounded_outline', showindex=False))

Unnamed: 0,warehouse_name,credits,dollars,cloud_services_credits,cloud_services_dollars,hourly_start_time
0,DAILY_REFRESH_WH,1.0,2.0,0.0,0.0,2022-10-10 15:00:00
1,DAILY_REFRESH_WH,1.0,2.0,0.0,0.0,2022-10-07 15:00:00
2,DAILY_REFRESH_WH,1.0,2.0,0.0,0.0,2022-10-08 15:00:00
3,DAILY_REFRESH_WH,1.0,2.0,0.0,0.0,2022-10-05 15:00:00
4,DAILY_REFRESH_WH,1.0,2.0,0.0,0.0,2022-10-09 15:00:00


In [14]:
df = df.drop(len(df)-1) ## Remove the last row of totals for the plot

fig = make_subplots(
    rows=1, cols=1,
    specs=[[{"type": "pie"}]],
)

fig.add_trace(go.Pie(labels=df['warehouse_name'].tolist(), values=df['dollars'].tolist(),name='dollars',marker_colors=color_scheme),row=1,col=1)

fig.update_layout(
    title={
        'text': "Breakdown of total cost by warehouse",
        'y':0.1,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [15]:
df_by_warehouse_ts=df.groupby(['warehouse_name','hourly_start_time']).sum('numeric_only').reset_index()
fig = px.area(df_by_warehouse_ts, x="hourly_start_time", y="credits", color="warehouse_name",color_discrete_sequence=color_scheme)
fig.show()
