In [None]:
%load_ext autoreload
%autoreload 2

%reload_ext dotenv

%dotenv ../../env/.env

import warnings
import utils
from pandas import Timedelta
import time
from utils import logger, sql_to_df, run_sql, session, conn
import pandas as pd
warnings.filterwarnings('ignore')

try:
    %load_ext autotime
except:
    !pip install ipython-autotime
    %load_ext autotime

In [None]:
import logging
logger.setLevel(logging.WARN)

In [None]:
%%markdown
# Warehouse Profiles: Inefficient Idle Spend

In [None]:
sql = """
select     warehouse_name || '-' || warehouse_id as name, *, dollars_used_compute - dollars_used_idle as dollars_used_active from warehouse_profile
"""
df_actual_profile = sql_to_df(sql)
import itables

print("WAREHOUSE PROFILES (ACTUAL)")
print("---------------------------")
itables.show(df_actual_profile)


In [None]:
import plotly.express as px

fig = px.bar(
    df_actual_profile.sort_values('dollars_used_idle', ascending=True).tail(20), 
    y='name', 
    x=["dollars_used_idle", 'dollars_used_active'], 
    # color = 'wh_utilization_status', 
    # color_discrete_sequence=['blue', 'green', 'red'],
    title = "spend breakdown: querying vs idle",
    orientation='h',
    height=500
)
fig.show()

fig = px.bar(
    df_actual_profile.sort_values('pct_idle', ascending=True).tail(20), 
    y="name", 
    x=["pct_idle"], 
    # color = 'wh_utilization_status', 
    # color_discrete_sequence=['blue', 'green', 'red'],
    title = "pct of uptime spent idle",
    orientation='h',
    height=500
)
fig.show()

In [None]:

sql = """
select 
    a."auto_suspend" as current_autosuspend_actual,
    o.*,
    wp.dollars_used_compute
from suspend_lag_by_wh o
inner join warehouses a 
on a."name" = o.warehouse_name
inner join warehouse_profile wp
on o.warehouse_id = wp.warehouse_id;
"""
df = sql_to_df(sql)
import itables

print("Autosuspend Stats")
print("---------------------------")
itables.show(df)

fig = px.bar(
    df.sort_values('dollars_used_compute', ascending=False).tail(20), 
    y="warehouse_name", 
    x=["current_autosuspend_actual",  "suspend_lag_avg", "suspend_lag_median"], 
    # color = 'wh_utilization_status', 
    # color_discrete_sequence=['blue', 'green', 'red'],
    title = "autosuspend: setting vs measured",
    orientation='h',
    barmode='group',
    height=500
)
fig.update_yaxes(dict(autorange="reversed"))

fig.show()

In [None]:
%%markdown
# Simulation Results: Idle Spend + Savings w/Dyanamic Autosuspend

In [None]:

sql = """
select * from warehouse_profile_sim
"""
df_sim_profile = sql_to_df(sql)
import itables

print("WAREHOUSE PROFILES (SIM)")
print("---------------------------")
itables.show(df_sim_profile)

In [None]:
sql = """
select 
    coalesce(actual.warehouse_id, sim.warehouse_id) warehouse_id,
    coalesce(actual.warehouse_name, sim.warehouse_name) warehouse_name,
    sim.strategy,
    actual.dollars_used_compute,
    actual."auto_suspend" actual_autosuspend,
    actual.max_cluster_number,
    actual.active_hours,
    actual.dollars_used_idle,
    actual.active_hours - actual.idle_hours as query_hours,
    actual.idle_hours,
    100*actual.idle_hours/actual.active_hours as pct_idle,
    sim.active_hours as active_hours_sim,
    sim.active_hours - sim.idle_hours as query_hours_sim,
    sim.idle_hours as idle_hours_sim,
    100*sim.idle_hours/sim.active_hours as pct_idle_sim,
    
    100*(sim.active_hours - actual.active_hours)/actual.active_hours as active_hours_diff_pct,
    100*(sim.idle_hours - actual.idle_hours)/actual.idle_hours as idle_hours_diff_pct,
    actual.dollars_used_idle as dollars_used_idle_actual,
    sim.dollars_used_idle as dollars_used_idle_sim,
    sim.dollars_used_idle - actual.dollars_used_idle as dollars_diff,
    -100*dollars_diff/actual.dollars_used_compute as pct_savings,
    timediff('day', actual.start_hour, actual.end_hour) as numdays,
    div0(-dollars_diff * 365, numdays)::float as annualized_savings,
    actual."auto_suspend" =  sim.strategy:autosuspend_sec::number and sim.strategy:type::text = 'snowflake' as simulates_reality
from warehouse_profile_sim sim
full outer join warehouse_profile actual
on actual.warehouse_id = sim.warehouse_id
-- and actual."auto_suspend" =  sim.strategy:autosuspend_sec::number
-- where sim.warehouse_name = 'DEMO'
-- where sim.warehouse_name = 'DEMO'
where strategy:type::text = 'greedy_after_one_min'
"""
df = sql_to_df(sql)
# df.dtypes
import itables

itables.show(df)

In [None]:
# %conda install itables -c conda-forge

In [None]:
print(f"total annualized savings: {df.annualized_savings.sum()}")
fig = px.bar(
    df.sort_values('dollars_used_idle', ascending=True).tail(20), 
    y="warehouse_name", 
    x=["dollars_used_idle_sim", "dollars_used_idle_actual" ], 
    # color = 'wh_utilization_status', 
    # color_discrete_sequence=['blue', 'green', 'red'],
    title = "idle hours: actual vs simulated smart suspend",
    orientation='h',
    barmode='group',
    height=500
)
fig.show()

fig = px.bar(
    df.sort_values('annualized_savings', ascending=True).tail(20), 
    y="warehouse_name", 
    x=["annualized_savings"], 
    # color = 'wh_utilization_status', 
    # color_discrete_sequence=['blue', 'green', 'red'],
    title = "annualized savings",
    orientation='h',
    height=500
)
fig.show()

fig = px.bar(
    df.sort_values('pct_savings', ascending=True).tail(20), 
    y="warehouse_name", 
    x=["pct_savings"], 
    # color = 'wh_utilization_status', 
    # color_discrete_sequence=['blue', 'green', 'red'],
    title = "percent savings",
    orientation='h',
    height=500
)
fig.show()

In [None]:

sql = """
select 
    sim.warehouse_id,
    sim.warehouse_name,
    actual.suspend_lag_avg actual_suspend_lag_avg,
    sim.suspend_lag_avg sim_suspend_lag_avg,
    actual.suspend_lag_median actual_suspend_lag_median,
    sim.suspend_lag_median sim_suspend_lag_median,
    wp.dollars_used_compute
from suspend_lag_by_wh_sim sim
left join suspend_lag_by_wh actual
on sim.warehouse_id = actual.warehouse_id
inner join warehouse_profile wp
on sim.warehouse_id = wp.warehouse_id
where strategy:type::text = 'greedy_after_one_min'
;
"""

df = sql_to_df(sql)
import itables

print("Autosuspend Stats")
print("---------------------------")
itables.show(df)

fig = px.bar(
    df.sort_values('dollars_used_compute', ascending=False).tail(20), 
    y="warehouse_name", 
    x=["actual_suspend_lag_avg",  "actual_suspend_lag_median", "sim_suspend_lag_avg", "sim_suspend_lag_median"], 
    # color = 'wh_utilization_status', 
    # color_discrete_sequence=['blue', 'green', 'red'],
    title = "observed suspension times: snowflake autosuspend (actual) vs smart suspend (sim)",
    orientation='h',
    barmode='group',
    height=1000
)
fig.update_yaxes(dict(autorange="reversed"))

fig.show()

In [None]:
import plotly
plotly.offline.init_notebook_mode()

In [None]:
%%sh 
jupyter nbconvert --to html smart-suspend-analysis.ipynb --no-input --output test