In [None]:
%load_ext autoreload
%autoreload 2
import sys, pathlib
sys.path.append(str(pathlib.Path.cwd().parent.parent))


In [None]:
%reload_ext dotenv

%dotenv ../../env/.env

import warnings
from pandas import Timedelta
# from optiml.utils import sf
import time
from optiml.utils.sf import logger, sql_to_df, run_sql, conn, session
import pandas as pd
warnings.filterwarnings('ignore')

try:
    %load_ext autotime
except:
    !pip install ipython-autotime
    %load_ext autotime


In [None]:
sql = """

create or replace view whp_enriched as
with aero_sim as (
    select 
        warehouse_id,
        warehouse_name,
        hour_start,
        strategy::text as strategy,
        (active_hours - idle_hours)*60 as query_minutes,
        idle_hours * 60 as idle_minutes,
        null as dollars_used_compute
    from warehouse_profile_by_hour_sim
    where 
    strategy in 
    (
        --{'autosuspend_sec': 60,   'engine': 'sql',   'type': 'snowflake' },
        {'engine': 'python',   'polling_sec': 1,   'type': 'greedy_after_one_min' }
    )
),
actual as (
    select 
        warehouse_id,
        warehouse_name,
        hour_start,
        'original' as strategy,
        (active_hours - idle_hours)*60 as query_minutes,
        idle_hours * 60 as idle_minutes,
        dollars_used_compute
    from warehouse_profile_by_hour
),
warehouse_profile_by_hour_enriched as 
(
    select
        a.*,
        b.strategy as sim_strategy,
        b.query_minutes as sim_query_minutes,
        b.idle_minutes as sim_idle_minutes,
        (least(sim_idle_minutes, a.idle_minutes) + a.query_minutes)/(a.idle_minutes + a.query_minutes) * a.dollars_used_compute as sim_dollars_used_compute
    from 
        actual a
    join aero_sim b
    on a.warehouse_id = b.warehouse_id
    and a.hour_start = b.hour_start
)
select * from warehouse_profile_by_hour_enriched
"""
df = sql_to_df(sql)
display(df)

spend_by_wh = """
-- select
--     *
-- from
--     warehouse_profile_by_hour_enriched
--     limit 10;
with agg as (
    select 
        --warehouse_id,
        warehouse_name || '-' || warehouse_id as warehouse_name,
        --date_trunc(day, hour_start) as day,
        -- hour_start,
        -- 100*abs((query_minutes - sim_query_minutes)/sim_query_minutes) < 0.01 as query_minutes_same, -- at most 0.01% deviation b/w query minutes
        count(*),
        sum(query_minutes) query_minutes,
        sum(sim_query_minutes) sim_query_minutes,
        sum(idle_minutes) as idle_minutes,
        sum(sim_idle_minutes) as sim_idle_minutes,
        sum(dollars_used_compute) as dollars_used_compute,
        sum(sim_dollars_used_compute) as sim_dollars_used_compute
        
    from whp_enriched
    group by all
)
select
    *,
    datediff(day, (select min(hour_start) from whp_enriched), (select max(hour_start) from whp_enriched)) as days_measured,
    
    dollars_used_compute - sim_dollars_used_compute  as savings,
    savings * 365/days_measured as savings_annualized,
    100 * savings/dollars_used_compute as pct_savings,
    
    
    100*idle_minutes/(query_minutes + idle_minutes) as pct_idle,
    dollars_used_compute * pct_idle / 100 as dollars_used_idle,
    dollars_used_compute - dollars_used_idle as dollars_used_active,
    
    100*sim_idle_minutes/(query_minutes + sim_idle_minutes) as sim_pct_idle,
    sim_dollars_used_compute * sim_pct_idle / 100 as sim_dollars_used_idle,
    
    idle_minutes / 60 as idle_hours,
    sim_idle_minutes / 60 as sim_idle_hours
    
    
from
    agg
order by 1 asc;
"""

spend_by_wh_df = sql_to_df(spend_by_wh)
import itables

itables.show(spend_by_wh_df)


suspension_lags_sql = """
select 
    --sim.warehouse_id,
    --sim.warehouse_name,
    sim.warehouse_name || '-' || sim.warehouse_id as warehouse_name,
    actual.suspend_lag_avg actual_suspend_lag_avg,
    sim.suspend_lag_avg sim_suspend_lag_avg,
    actual.suspend_lag_median actual_suspend_lag_median,
    sim.suspend_lag_median sim_suspend_lag_median,
    wp.dollars_used_compute
from suspend_lag_by_wh_sim sim
left join suspend_lag_by_wh actual
on sim.warehouse_id = actual.warehouse_id
inner join warehouse_profile wp
on sim.warehouse_id = wp.warehouse_id
where strategy:type::text = 'greedy_after_one_min'
;
"""

suspension_lags_df = sql_to_df(suspension_lags_sql)
import itables


print("Autosuspend Stats")
print("---------------------------")
itables.show(suspension_lags_df)

In [None]:
import plotly.express as px

fig = px.bar(
    spend_by_wh_df.sort_values('dollars_used_idle', ascending=True).tail(20), 
    y='warehouse_name', 
    x=['dollars_used_active', "dollars_used_idle"], 
    # color = 'wh_utilization_status', 
    # color_discrete_sequence=['blue', 'green', 'red'],
    title = "spend breakdown: querying vs idle",
    orientation='h',
    height=500
)
fig.show()

fig = px.bar(
    spend_by_wh_df.sort_values('pct_idle', ascending=True).tail(20), 
    y="warehouse_name", 
    x=["pct_idle"], 
    # color = 'wh_utilization_status', 
    # color_discrete_sequence=['blue', 'green', 'red'],
    title = "pct of uptime spent idle",
    orientation='h',
    height=500
)
fig.show()


In [None]:
%%markdown
# Impact Estimates

In [None]:
# print(f"total annualized savings: {df.annualized_savings.sum()}")
fig = px.bar(
    suspension_lags_df.sort_values('dollars_used_compute', ascending=False).tail(20), 
    y="warehouse_name", 
    x=["actual_suspend_lag_avg",  "actual_suspend_lag_median", "sim_suspend_lag_avg", "sim_suspend_lag_median"], 
    # color = 'wh_utilization_status', 
    # color_discrete_sequence=['blue', 'green', 'red'],
    title = "suspension lags: snowflake autosuspend (actual) vs smart suspend (sim)",
    orientation='h',
    barmode='group',
    height=600
)
fig.update_yaxes(dict(autorange="reversed"))

fig.show()

fig = px.bar(
    spend_by_wh_df.sort_values('dollars_used_idle', ascending=True).tail(20), 
    y="warehouse_name", 
    x=["sim_dollars_used_idle", "dollars_used_idle" ], 
    # color = 'wh_utilization_status', 
    # color_discrete_sequence=['blue', 'green', 'red'],
    title = "idle hours: actual vs simulated smart suspend",
    orientation='h',
    barmode='group',
    height=500
)
fig.show()

fig = px.bar(
    spend_by_wh_df.sort_values('savings_annualized', ascending=True).tail(20), 
    y="warehouse_name", 
    x=["savings_annualized"], 
    # color = 'wh_utilization_status', 
    # color_discrete_sequence=['blue', 'green', 'red'],
    title = "annualized savings",
    orientation='h',
    height=500
)
fig.show()

fig = px.bar(
    spend_by_wh_df.sort_values('pct_savings', ascending=True).tail(20), 
    y="warehouse_name", 
    x=["pct_savings"], 
    # color = 'wh_utilization_status', 
    # color_discrete_sequence=['blue', 'green', 'red'],
    title = "percent savings",
    orientation='h',
    height=500
)
fig.show()

In [None]:
%%markdown
# Savings Estimates

In [None]:
spend_by_day_sql = """
-- select
--     *
-- from
--     warehouse_profile_by_hour_enriched
--     limit 10;
with agg as (
    select 
        --warehouse_id,
        --warehouse_name || '-' || warehouse_id as warehouse_name,
        date_trunc(day, hour_start) as day,
        -- hour_start,
        -- 100*abs((query_minutes - sim_query_minutes)/sim_query_minutes) < 0.01 as query_minutes_same, -- at most 0.01% deviation b/w query minutes
        count(*),
        sum(query_minutes) query_minutes,
        sum(sim_query_minutes) sim_query_minutes,
        sum(idle_minutes) as idle_minutes,
        sum(sim_idle_minutes) as sim_idle_minutes,
        sum(dollars_used_compute) as dollars_used_compute,
        sum(sim_dollars_used_compute) as sim_dollars_used_compute
        
    from whp_enriched
    group by all
)
select
    *,
    datediff(day, (select min(hour_start) from whp_enriched), (select max(hour_start) from whp_enriched)) as days_measured,
    
    dollars_used_compute - sim_dollars_used_compute  as savings,
    savings * 365/days_measured as savings_annualized,
    100 * savings/dollars_used_compute as pct_savings,
    
    
    100*idle_minutes/(query_minutes + idle_minutes) as pct_idle,
    dollars_used_compute * pct_idle / 100 as dollars_used_idle,
    dollars_used_compute - dollars_used_idle as dollars_used_active,
    
    100*sim_idle_minutes/(query_minutes + sim_idle_minutes) as sim_pct_idle,
    sim_dollars_used_compute * sim_pct_idle / 100 as sim_dollars_used_idle,
    
    idle_minutes / 60 as idle_hours,
    sim_idle_minutes / 60 as sim_idle_hours
    
    
from
    agg
order by 1 asc;
"""

spend_by_day_df = sql_to_df(spend_by_day_sql)
import itables

In [None]:
period_days = spend_by_day_df.days_measured[0]
print(f"period: {period_days} days")
print(f"savings over period: {spend_by_day_df.savings.sum()}")
print(f"savings over period [check]: {spend_by_wh_df.savings.sum()}")
annualized_savings = spend_by_day_df.savings.sum() * 365 / spend_by_day_df.days_measured[0]
print(f"annualized savings: {annualized_savings}")

In [None]:
import plotly.express as px

# df.sim_dollars_used_compute = df.sim_dollars_used_compute.map(float)
# df.savings = df.savings.map(float)
fig = px.bar(
    spend_by_day_df, 
    x="day", 
    y=["sim_dollars_used_compute", "savings"], 
    color_discrete_sequence=['gray', 'green'],
    title = "Compute savings w/aero",
    # orientation='h',
    # barmode='stack',
    height=500
)
# fig.update_yaxes(dict(autorange="reversed"))

fig.show()

In [None]:
%%sh 
jupyter nbconvert --to html dynamic_optimization_estimates.ipynb --no-input --output dynamic_optimization_estimates