In [None]:
%load_ext autoreload
%autoreload 2
import sys, pathlib
sys.path.append(str(pathlib.Path.cwd().parent.parent))

In [None]:
%reload_ext dotenv

%dotenv ../../env/.env

import warnings
from pandas import Timedelta
# from optiml.utils import sf
import time
from optiml.utils.sf import logger, sql_to_df, run_sql, conn, session
import pandas as pd
warnings.filterwarnings('ignore')

try:
    %load_ext autotime
except:
    !pip install ipython-autotime
    %load_ext autotime


In [None]:
import uuid

strategies = [
    "control",
    "suspend_idle_after_one_minute",
    # "suspend_after_one_minute",
    "minimize_clusters_if_no_queuing",
    'economy',
    "economy_up_standard_down"
]

import sched , time

def print_time(a='default'):
    print("From print_time", time.time(), a)

def create_warehouse_sql(wh_name):
    return f"""
    create or replace warehouse {wh_name}
    auto_suspend = 60
    SCALING_POLICY=STANDARD
    INITIALLY_SUSPENDED=true
    MAX_CLUSTER_COUNT = 4;
    """

def test_query(table_name, multiplier):
    # return f"""
    # create or replace table {table_name} as 
    # SELECT seq4() col1 , uniform(1, 10, RANDOM(12)) col2 
    # FROM TABLE(GENERATOR(ROWCOUNT => {multiplier} * 10000000)) v 
    # ORDER BY 2;
    # """
    return f"""
    create or replace table {table_name} as 
    with a as (
    SELECT seq4() col1 , uniform(1, 1000000000, RANDOM(12)) col2 
    FROM TABLE(GENERATOR(ROWCOUNT => {multiplier} * 10000000)) v
    )
    select a1.*, a2.col2 as col3
    from a a1
    left join a a2
    on a1.col1 = a2.col2
    ORDER BY a1.col2;
    """
import uuid
def run_query(multiplier, warehouse=None):
    print("running query")
    if warehouse:
        run_sql(f"use warehouse {warehouse};", wait=True)
    random_id = str(uuid.uuid4()).replace("-", "_")
    q = test_query(f"test_table_{random_id}", multiplier)
    run_sql(q, wait=False)

    
strategy_warehouses = {}
warehouse_creates = []
warehouse_drops = []
for strategy in strategies:
    import uuid
    id = str(uuid.uuid4()).replace("-", "_")
    wh_name = f"test_{strategy}_{id}"
    strategy_warehouses[strategy] = wh_name
    warehouse_creates.append(create_warehouse_sql(wh_name))
    warehouse_drops.append(f"drop warehouse {wh_name}")




In [None]:
list(strategy_warehouses.values())

In [None]:
# CREATE WAREHOUSES
for c in warehouse_creates:
    print(c)
    run_sql(c)

In [None]:
economy = 'test_economy_53142176_000d_4587_8911_4a6115c4ebac'
run_sql(f'alter warehouse {economy} set scaling_policy = economy')
sql_to_df("show warehouses")

In [None]:
initial_offset = 5

# QUERY_LENGTH = 10
NUM_PARALLEL_QUERIES = 1


N = 60
numbers = list(range(1, N+1))

# workload_start_times = [0, 10, 20, 30] 

workload_start_times = list(range(1, N+1, 1))
print(workload_start_times)

s = sched.scheduler(time.time, time.sleep)

for i in range(NUM_PARALLEL_QUERIES):
    for t in workload_start_times:
        offset = initial_offset + t
        # QUERY_WAIT = 0 if t < 30 else 10
        multiplier = t/5 + 1
        for wh_name in strategy_warehouses.values():
            s.enter(offset,1,run_query,argument=(multiplier,wh_name))

print(warehouse_creates)
# for job in s.queue:
#     print(job)

In [None]:
s.run()

In [None]:
list(strategy_warehouses.values())

In [None]:
for s in warehouse_drops:
    run_sql(s)

In [None]:
raise ValueError("done")

In [None]:
# three interventions to try
# economy up, standard down
# if running_count == max and nothing queued, set max to min; if queries queued again, set max to max again
# if num_active has been same for > 1 minute, set num_active to 0
# 

In [None]:
# run_sql(f"alter warehouse {wh_name} abort all queries")
# run_sql(f"alter warehouse {wh_name} suspend")
show_df = sql_to_df("show warehouses")
display(show_df)
for wh in show_df.name.to_list():
    if wh.startswith('TEST'):
        print(wh)
        run_sql(f"drop warehouse {wh}")

In [None]:
qdf[['warehouse_name', 'cluster_number', 'start_time', 'exec_start_time', 'end_time', 'query_text', 'query_id']]

In [None]:
print(queries)

In [None]:
import plotly.express as px

queries = f"""
with q as (
select 
    *,
      warehouse_name || ':' || cluster_number || ':' || query_id as warehouse_query,
    TIMESTAMPADD( millisecond , -execution_time,end_time) as exec_start_time
from table(information_schema.query_history_by_warehouse('{wh_name}'))
where execution_status != 'RUNNING'
and warehouse_size is not null
and start_time between '2023-06-01 21:30' and '2023-06-02 11:00'
order by start_time desc
limit 100
)
select
  'running' as status,
  warehouse_query,
  warehouse_name,
  cluster_number,
  exec_start_time as start_time,
  end_time,
  query_text,
  query_id
from q
union
select
  'queued' as status,
  warehouse_query,
  warehouse_name,
  cluster_number,
  start_time,
  exec_start_time as end_time,
  query_text,
  query_id
from q
"""

qdf = sql_to_df(queries)
display(qdf)

In [None]:
[qdf.start_time.min(),qdf.end_time.max()]

In [None]:
import plotly.express as px



qdf['warehouse_query'] = qdf.warehouse_name + " : " + qdf.cluster_number.map(str) + " : " + qdf.query_id

fig = px.timeline(qdf.sort_values(['warehouse_name', 'cluster_number', 'start_time']), 
                  x_start="start_time", x_end="end_time", y="warehouse_query", 
                  color='status', 
                  hover_data=['query_text', 'query_id'])

fig.update_layout(
    height=1000,
)
fig.update_xaxes(range = [qdf.start_time.min(),qdf.end_time.max()])

fig.show()


# figures = [
#         px.scatter(whe.sort_values('warehouse_name'), x="timestamp", y="warehouse_name", color='event_name'),
#         px.timeline(qdf.sort_values(['warehouse_name', 'start_time']), x_start="start_time", x_end="end_time", y="warehouse_query", hover_data=['query_text', 'query_id']),
#     ]

# fig = make_subplots(rows=len(figures), cols=1, shared_xaxes=True, vertical_spacing=0.05) 

# for i, figure in enumerate(figures):
#     for trace in range(len(figure["data"])):
#         fig.append_trace(figure["data"][trace], row=i+1, col=1)

# fig.update_xaxes(title_text="warehouse events", row=1, col=1)
# fig.update_xaxes(title_text="queries", row=2, col=1)
# fig.update_xaxes(showgrid=True,minor=dict(showgrid=True))
# fig.update_yaxes(showgrid=True,minor=dict(showgrid=True))
# fig.update_xaxes(autorange=True)
# fig.update_layout(
#     height=800,
# )
# fig.update_xaxes(type='date', autorange=True)

# fig.show()

# fig = px.bar(whm[whm.warehouse_name != 'OPS'], y='warehouse_name', x = ['credits_used_compute'], orientation = 'h', title='credits x warehouse')
# fig.show()
# fig = px.bar(whm, y='warehouse_name', x = ['credits_used_compute', 'credits_used_cloud_services'], orientation = 'h')
# fig.show()

In [None]:
qdf.query_id.nunique()

In [None]:

from optiml.utils.sf import snowconn

 
def monitor(target_wh, using_wh, continuous=False):
    using_conn = snowconn()
    
    def show():
        run_sql(f"USE WAREHOUSE {using_wh}", ctx=using_conn)
        show_df = sql_to_df("show warehouses")

        queries = f"""
        select 
        *
    from table(information_schema.query_history_by_warehouse('{target_wh}'))
    order by start_time desc
    limit 10;
        """
        show_df = sql_to_df("show warehouses")
        print(show_df[show_df.name == target_wh.upper()].to_string())

        queries_df = sql_to_df(queries)
        print(show_df[show_df.name == target_wh.upper()].to_string())
        print(queries_df.to_string())
    
    show()
    # if continuous:
    #     while(True):
            

monitor("test", "demo")

In [None]:
run_sql("use warehouse demo")
for strategy in strategies:
    wh_name = strategy_warehouses[strategy]
    print(wh_name)
    # print(cost_query)
    cost_query = f"""
    select * 
from table(information_schema.WAREHOUSE_METERING_HISTORY( 
    date_range_start => dateadd('days',-10,current_date()),
    WAREHOUSE_NAME => '{wh_name.upper()}' )
) 
order by start_time desc limit 10;
    """
    df = sql_to_df(cost_query)
    display(df)
    

In [None]:
print(cost_query)