In [None]:
# Import python packages
import streamlit as st
import pandas as pd
from croniter import croniter
from datetime import datetime, timedelta
import pytz
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import nbformat
import numpy as np
from snowflake.snowpark.context import get_active_session


# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()

In [None]:
show tasks in account;
set QUERY_ID_TASKS = last_query_id();

select * FROM TABLE(RESULT_SCAN($QUERY_ID_TASKS));

In [None]:
WITH high_level_details AS (
  SELECT
    "name" AS TASK_NAME,
    "id" AS TASK_ID,
    "database_name" AS DATABASE_NAME,
    "database_name" || '.' || "schema_name" || '.' || "name" AS FQ_TASK_NAME,
    "schema_name" AS SCHEMA_NAME,
    "owner" AS OWNER,
    "warehouse" AS WAREHOUSE,
    "schedule" AS SCHEDULE,
    "predecessors" AS PREDECESSORS,
    "definition" AS DEFINITION,
    "task_relations" AS TASK_RELATIONS,
    "scheduling_mode" AS SCHEDULING_MODE,
    "target_completion_interval" AS TARGET_COMPLETION_INTERVAL,
    CASE
      WHEN NOT WAREHOUSE IS NULL THEN 'USER_MANAGED'
      WHEN SCHEDULING_MODE ILIKE '%FLEXIBLE%' THEN 'FLEXIBLE'
      ELSE 'SERVERLESS'
    END AS TASK_TYPE
  FROM
    TABLE(RESULT_SCAN($QUERY_ID_TASKS))
),
target_interval AS (
  SELECT
    FQ_TASK_NAME,
    CAST(
      CASE
        WHEN REGEXP_LIKE(
          UPPER(TARGET_COMPLETION_INTERVAL),
          '\\d+\\s*(MINUTE|M)'
        ) THEN TO_NUMBER(
          REGEXP_SUBSTR(TARGET_COMPLETION_INTERVAL, '\\d+')
        )
        WHEN REGEXP_LIKE(
          UPPER(TARGET_COMPLETION_INTERVAL),
          '\\d+\\s*(HOUR|H)'
        ) THEN TO_NUMBER(
          REGEXP_SUBSTR(TARGET_COMPLETION_INTERVAL, '\\d+')
        ) * 60
        WHEN REGEXP_LIKE(
          UPPER(TARGET_COMPLETION_INTERVAL),
          '\\d+\\s*(SECOND|S)'
        ) THEN TO_NUMBER(
          REGEXP_SUBSTR(TARGET_COMPLETION_INTERVAL, '\\d+')
        ) / 60
      END AS INT
    ) AS TARGET_COMPLETION_INTERVAL_MINS
  FROM
    high_level_details
)
SELECT
  hld.TASK_NAME,
  hld.FQ_TASK_NAME,
  hld.TASK_ID,
  hld.OWNER,
  hld.WAREHOUSE,
  hld.DATABASE_NAME,
  hld.SCHEMA_NAME,
  hld.SCHEDULE,
  CASE
    WHEN hld.SCHEDULING_MODE ILIKE '%FLEXIBLE%' THEN hld.SCHEDULING_MODE
    ELSE 'NONE'
  END AS SCHEDULING_MODE,
  hld.PREDECESSORS,
  hld.DEFINITION,
  t.TARGET_COMPLETION_INTERVAL_MINS,
  hld.TASK_RELATIONS,
  hld.TASK_TYPE
FROM
  target_interval AS t
  JOIN high_level_details AS hld ON t.FQ_TASK_NAME = hld.FQ_TASK_NAME;

SET
  QUERY_ID_CORE_INFORMATION = LAST_QUERY_ID();
  
SELECT
  *
FROM
  TABLE(RESULT_SCAN($QUERY_ID_CORE_INFORMATION));

In [None]:
WITH show_tasks AS (
  SELECT
    *
  FROM
    TABLE(RESULT_SCAN($QUERY_ID_CORE_INFORMATION))
),
tasks
/* Flatten predecessors (including tasks without predecessors) */
AS (
  SELECT
    st.task_name,
    st.fq_task_name,
    st.task_id,
    st.owner,
    st.warehouse,
    st.database_name,
    st.schema_name,
    st.schedule,
    st.scheduling_mode,
    st.predecessors,
    st.definition,
    st.target_completion_interval_mins,
    st.task_relations,
    st.task_type,
    CAST(f.value AS TEXT) AS predecessor_task
  FROM
    show_tasks AS st,
    LATERAL FLATTEN(
      input => TRY_PARSE_JSON(CAST(st.predecessors AS VARIANT))
    ) AS f(SEQ, KEY, PATH, INDEX, VALUE, THIS)
  UNION ALL
  SELECT
    st.task_name,
    st.fq_task_name,
    st.task_id,
    st.owner,
    st.warehouse,
    st.database_name,
    st.schema_name,
    st.schedule,
    st.scheduling_mode,
    st.predecessors,
    st.definition,
    st.target_completion_interval_mins,
    st.task_relations,
    st.task_type,
    NULL AS predecessor_task
  FROM
    show_tasks AS st
  WHERE
    ARRAY_SIZE(TRY_PARSE_JSON(CAST(st.predecessors AS VARIANT))) = 0
),
recursive_roots
/* Recursive CTE to find root for each task */
/* Final output with all columns */
AS (
  SELECT
    task_name,
    fq_task_name,
    task_id,
    owner,
    warehouse,
    database_name,
    schema_name,
    schedule,
    scheduling_mode,
    predecessors,
    definition,
    target_completion_interval_mins,
    task_relations,
    task_type,
    predecessor_task,
    task_name AS root_task,
    fq_task_name AS fq_root_task
  FROM
    tasks
  WHERE
    predecessor_task IS NULL
  UNION ALL
  SELECT
    t.task_name,
    t.fq_task_name,
    t.task_id,
    t.owner,
    t.warehouse,
    t.database_name,
    t.schema_name,
    t.schedule,
    t.scheduling_mode,
    t.predecessors,
    t.definition,
    t.target_completion_interval_mins,
    t.task_relations,
    t.task_type,
    t.predecessor_task,
    r.root_task,
    r.fq_root_task
  FROM
    tasks AS t
    JOIN recursive_roots AS r ON t.predecessor_task = r.fq_task_name
)
SELECT
  task_name
  /* Basic Task Identifiers */,
  fq_task_name,
  task_id,
  task_type,
  database_name
  /* Location and Ownership */,
  schema_name,
  owner,
  warehouse,
  schedule
  /* Scheduling and Execution Details */,
  scheduling_mode,
  target_completion_interval_mins,
  definition,
  predecessors
  /* Task Dependencies and Relations */,
  task_relations,
  fq_root_task
FROM
  recursive_roots
GROUP BY
  ALL
ORDER BY
  fq_task_name;

SET QUERY_ID_HIERARCHY = last_query_id();

SELECT * FROM TABLE(RESULT_SCAN($QUERY_ID_HIERARCHY));

In [None]:
# Import required packages
import streamlit as st
import plotly.express as px
from snowflake.snowpark.context import get_active_session

# Get the data from the query
session = get_active_session()
df = session.sql("""
WITH show_tasks AS (
  SELECT
    *
  FROM
    TABLE(RESULT_SCAN($QUERY_ID_CORE_INFORMATION))
),
tasks AS (
  SELECT
    st.task_name,
    st.fq_task_name,
    st.task_id,
    st.owner,
    st.warehouse,
    st.database_name,
    st.schema_name,
    st.schedule,
    st.scheduling_mode,
    st.predecessors,
    st.definition,
    st.target_completion_interval_mins,
    st.task_relations,
    st.task_type,
    CAST(f.value AS TEXT) AS predecessor_task
  FROM
    show_tasks AS st,
    LATERAL FLATTEN(
      input => TRY_PARSE_JSON(CAST(st.predecessors AS VARIANT))
    ) AS f(SEQ, KEY, PATH, INDEX, VALUE, THIS)
  UNION ALL
  SELECT
    st.task_name,
    st.fq_task_name,
    st.task_id,
    st.owner,
    st.warehouse,
    st.database_name,
    st.schema_name,
    st.schedule,
    st.scheduling_mode,
    st.predecessors,
    st.definition,
    st.target_completion_interval_mins,
    st.task_relations,
    st.task_type,
    NULL AS predecessor_task
  FROM
    show_tasks AS st
  WHERE
    ARRAY_SIZE(TRY_PARSE_JSON(CAST(st.predecessors AS VARIANT))) = 0
),
recursive_roots AS (
  SELECT
    task_name,
    fq_task_name,
    task_id,
    owner,
    warehouse,
    database_name,
    schema_name,
    schedule,
    scheduling_mode,
    predecessors,
    definition,
    target_completion_interval_mins,
    task_relations,
    task_type,
    predecessor_task,
    task_name AS root_task,
    fq_task_name AS fq_root_task
  FROM
    tasks
  WHERE
    predecessor_task IS NULL
  UNION ALL
  SELECT
    t.task_name,
    t.fq_task_name,
    t.task_id,
    t.owner,
    t.warehouse,
    t.database_name,
    t.schema_name,
    t.schedule,
    t.scheduling_mode,
    t.predecessors,
    t.definition,
    t.target_completion_interval_mins,
    t.task_relations,
    t.task_type,
    t.predecessor_task,
    r.root_task,
    r.fq_root_task
  FROM
    tasks AS t
    JOIN recursive_roots AS r ON t.predecessor_task = r.fq_task_name
)
SELECT
  task_name,
  fq_task_name,
  task_id,
  task_type,
  database_name,
  schema_name,
  owner,
  warehouse,
  schedule,
  scheduling_mode,
  target_completion_interval_mins,
  definition,
  predecessors,
  task_relations,
  fq_root_task
FROM
  recursive_roots
GROUP BY
  ALL
ORDER BY
  fq_task_name
""").to_pandas()

# Create database filter
databases = sorted(df['DATABASE_NAME'].unique())
selected_database = st.selectbox('Select Database', options=databases)

# Filter tasks by selected database
filtered_df = df[df['DATABASE_NAME'] == selected_database]

# Create task selector
tasks = sorted(filtered_df['TASK_NAME'].unique())
selected_task = st.selectbox('Select Task', options=tasks)

# Get task details
task_details = filtered_df[filtered_df['TASK_NAME'] == selected_task].iloc[0]

# Display task details in an organized layout
st.markdown("### Task Details", help="Detailed information about the selected task")

# Create a container for better spacing
with st.container():
    # Basic Information and Location in one row
    col1, col2, col3, col4, col5 = st.columns(5)
    with col1:
        st.metric("Type", task_details['TASK_TYPE'], label_visibility="visible", help="Task execution type")
    with col2:
        st.metric("Owner", task_details['OWNER'], label_visibility="visible", help="Task owner")
    with col3:
        st.metric("Warehouse", task_details['WAREHOUSE'] if task_details['WAREHOUSE'] else 'N/A', label_visibility="visible", help="Associated warehouse")
    with col4:
        st.metric("Database", task_details['DATABASE_NAME'], label_visibility="visible", help="Database name")
    with col5:
        st.metric("Schema", task_details['SCHEMA_NAME'], label_visibility="visible", help="Schema name")

    # Add custom CSS to reduce metric size
    st.markdown("""
        <style>
            [data-testid="stMetricValue"] {
                font-size: 1rem;
            }
            [data-testid="stMetricLabel"] {
                font-size: 0.8rem;
            }
        </style>
    """, unsafe_allow_html=True)

    # Scheduling Information
    col1, col2, col3 = st.columns(3)
    with col1:
        st.metric("Schedule", task_details['SCHEDULE'] if task_details['SCHEDULE'] else 'N/A', label_visibility="visible", help="Task schedule")
    with col2:
        st.metric("Mode", task_details['SCHEDULING_MODE'], label_visibility="visible", help="Scheduling mode")
    with col3:
        if task_details['TARGET_COMPLETION_INTERVAL_MINS']:
            st.metric("Target Interval (mins)", f"{task_details['TARGET_COMPLETION_INTERVAL_MINS']:.0f}", label_visibility="visible", help="Target completion interval")

    # Dependencies and Root Task
    with st.expander("Dependencies and Full Task Name"):
        st.text("Full Task Name:")
        st.code(task_details['FQ_TASK_NAME'], language=None)
        
        st.text("Root Task:")
        st.code(task_details['FQ_ROOT_TASK'], language=None)
        
        st.text("Predecessors:")
        if task_details['PREDECESSORS']:
            st.json(task_details['PREDECESSORS'])
        else:
            st.text("No predecessors")

    # Task Definition
    with st.expander("Task Definition"):
        st.code(task_details['DEFINITION'])

In [None]:
/* Get task history for the last 30 days */
SELECT
  *
FROM
  snowflake.account_usage.task_history
WHERE
  query_start_time > DATEADD(DAY, -30, CURRENT_TIMESTAMP());
SET
  QUERY_ID_TASK_HISTORY = LAST_QUERY_ID();
  /* Get execution times from task_history for all tasks */
SELECT
  DATE(SCHEDULED_TIME) AS DATE,
  NAME AS TASK_NAME,
  DATABASE_NAME || '.' || SCHEMA_NAME || '.' || NAME AS FQ_TASK_NAME,
  SUM(DATEDIFF(SECOND, SCHEDULED_TIME, COMPLETED_TIME)) AS TOTAL_EXECUTION_SECONDS,
  AVG(DATEDIFF(SECOND, SCHEDULED_TIME, COMPLETED_TIME)) AS AVG_EXECUTION_SECONDS,
  COUNT(*) AS RUNS
FROM
  TABLE(RESULT_SCAN($QUERY_ID_TASK_HISTORY))
GROUP BY
  ALL
ORDER BY 2, 1 ;
SET
  QUERY_ID_RUN_TIMES = LAST_QUERY_ID();
SELECT
  *
FROM
  TABLE(RESULT_SCAN($QUERY_ID_RUN_TIMES));

In [None]:
# Import required packages
import streamlit as st
import plotly.express as px
from snowflake.snowpark.context import get_active_session

# Get the data from the previous SQL query
session = get_active_session()
df = session.sql("""
SELECT
  DATE(SCHEDULED_TIME) AS DATE,
  NAME AS TASK_NAME,
  DATABASE_NAME || '.' || SCHEMA_NAME || '.' || NAME AS FQ_TASK_NAME,
  AVG(DATEDIFF(SECOND, SCHEDULED_TIME, COMPLETED_TIME)) AS AVG_EXECUTION_SECONDS
FROM
  snowflake.account_usage.task_history
WHERE
  query_start_time > DATEADD(DAY, -30, CURRENT_TIMESTAMP())
  and task_name not like 'CIS%'
GROUP BY
  DATE,
  NAME,
  DATABASE_NAME || '.' || SCHEMA_NAME || '.' || NAME
ORDER BY 
  DATE, TASK_NAME
""").to_pandas()

# Create the line chart
fig = px.line(df, 
              x='DATE', 
              y='AVG_EXECUTION_SECONDS',
              color='TASK_NAME',
              title='Average Task Execution Time by Day',
              labels={
                  'DATE': 'Date',
                  'AVG_EXECUTION_SECONDS': 'Average Execution Time (seconds)',
                  'TASK_NAME': 'Task Name'
              })

# Customize the layout
fig.update_layout(
    height=600,
    showlegend=True,
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=1.02
    ),
    hovermode='x unified'
)

# Display the chart
st.plotly_chart(fig, use_container_width=True)


In [None]:
# Import required packages
import streamlit as st
import plotly.express as px
from snowflake.snowpark.context import get_active_session

# Get the data from the previous SQL query
session = get_active_session()
df = session.sql("""
SELECT
  DATE(SCHEDULED_TIME) AS DATE,
  NAME AS TASK_NAME,
  DATABASE_NAME || '.' || SCHEMA_NAME || '.' || NAME AS FQ_TASK_NAME,
  SUM(DATEDIFF(SECOND, SCHEDULED_TIME, COMPLETED_TIME)) AS TOTAL_EXECUTION_SECONDS
FROM
  snowflake.account_usage.task_history
WHERE
  query_start_time > DATEADD(DAY, -30, CURRENT_TIMESTAMP())
  and task_name not like 'CIS%'
GROUP BY
  DATE,
  NAME,
  DATABASE_NAME || '.' || SCHEMA_NAME || '.' || NAME
ORDER BY 
  DATE, TASK_NAME
""").to_pandas()

# Create the line chart
fig = px.line(df, 
              x='DATE', 
              y='TOTAL_EXECUTION_SECONDS',
              color='TASK_NAME',
              title='Total Task Execution Time by Day',
              labels={
                  'DATE': 'Date',
                  'TOTAL_EXECUTION_SECONDS': 'Total Execution Time (seconds)',
                  'TASK_NAME': 'Task Name'
              })

# Customize the layout
fig.update_layout(
    height=600,
    showlegend=True,
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=1.02
    ),
    hovermode='x unified'
)

# Display the chart
st.plotly_chart(fig, use_container_width=True)


In [None]:
-- WITH CORE_INFORMATION AS (
--   SELECT
--     *
--   FROM
--     TABLE(RESULT_SCAN($QUERY_ID_CORE_INFORMATION))
-- ),
-- HIERARCHY AS (
--   SELECT
--     *
--   FROM
--     TABLE(RESULT_SCAN($QUERY_ID_HIERARCHY))
-- ),
-- RUN_TIMES AS (
--   SELECT
--     *
--   FROM
--     TABLE(RESULT_SCAN($QUERY_ID_RUN_TIMES))
-- )
-- SELECT
--   CI.TASK_NAME,
--   CI.FQ_TASK_NAME,
--   CI.OWNER,
--   CI.WAREHOUSE,
--   CI.DEFINITION,
--   CI.SCHEDULE,
--   CI.SCHEDULING_MODE,
--   CI.TARGET_COMPLETION_INTERVAL_MINS,
--   CI.TASK_TYPE,
--   H.FQ_ROOT_TASK,
--   RT.TOTAL_EXECUTION_SECONDS,
--   RT.AVG_EXECUTION_SECONDS,
--   RT.RUNS
-- FROM
--   CORE_INFORMATION AS CI
--   JOIN HIERARCHY AS H ON CI.FQ_TASK_NAME = H.FQ_TASK_NAME
--   LEFT JOIN RUN_TIMES AS RT ON RT.FQ_TASK_NAME = CI.FQ_TASK_NAME;

In [None]:
WITH CORE_INFORMATION AS (
  SELECT
    *
  FROM
    TABLE(RESULT_SCAN($QUERY_ID_CORE_INFORMATION))
),
HIERARCHY AS (
  SELECT
    *
  FROM
    TABLE(RESULT_SCAN($QUERY_ID_HIERARCHY))
),
RUN_TIMES AS (
  SELECT
    *
  FROM
    TABLE(RESULT_SCAN($QUERY_ID_RUN_TIMES))
)
SELECT
  CI.TASK_NAME
  /* Basic Task Identifiers */,
  CI.FQ_TASK_NAME,
  CI.TASK_ID,
  CI.TASK_TYPE,
  CI.DATABASE_NAME
  /* Location and Ownership */,
  CI.SCHEMA_NAME,
  CI.OWNER,
  CI.WAREHOUSE,
  CI.SCHEDULE
  /* Task Configuration */,
  CI.SCHEDULING_MODE,
  CI.TARGET_COMPLETION_INTERVAL_MINS,
  CI.DEFINITION,
  CI.PREDECESSORS
  /* Task Dependencies */,
  CI.TASK_RELATIONS,
  H.FQ_ROOT_TASK,
  RT.TOTAL_EXECUTION_SECONDS
  /* Execution Statistics */,
  RT.AVG_EXECUTION_SECONDS,
  RT.RUNS
FROM
  CORE_INFORMATION AS CI
  JOIN HIERARCHY AS H ON CI.FQ_TASK_NAME = H.FQ_TASK_NAME
  LEFT JOIN RUN_TIMES AS RT ON RT.FQ_TASK_NAME = CI.FQ_TASK_NAME;

In [None]:
WITH TASK_HISTORY AS (
  SELECT
    DISTINCT QUERY_ID,
    NAME AS TASK_NAME,
    STATE,
    DATABASE_NAME || '.' || SCHEMA_NAME || '.' || NAME AS FQ_TASK_NAME
  FROM
    TABLE(RESULT_SCAN($QUERY_ID_TASK_HISTORY))
),
QUERY_COST_DETAIL AS (
  SELECT
    TH.TASK_NAME,
    FQ_TASK_NAME,
    QH.QUERY_ID,
    QH.START_TIME,
    QH.DATABASE_NAME,
    QH.SCHEMA_NAME,
    QH.WAREHOUSE_NAME,
    QH.WAREHOUSE_SIZE,
    TOTAL_ELAPSED_TIME / 3600000.0 AS TOTAL_ELAPSED_HOURS,
    CASE
      UPPER(QH.WAREHOUSE_SIZE)
      WHEN 'X-SMALL' THEN 1
      WHEN 'SMALL' THEN 2
      WHEN 'MEDIUM' THEN 4
      WHEN 'LARGE' THEN 8
      WHEN 'X-LARGE' THEN 16
      WHEN '2X-LARGE' THEN 32
      WHEN '3X-LARGE' THEN 64
      WHEN '4X-LARGE' THEN 128
      ELSE NULL
    END AS CREDITS_PER_HOUR,
    TOTAL_ELAPSED_HOURS * CREDITS_PER_HOUR AS ESTIMATED_CREDITS_USED
  FROM
    SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY AS QH
    JOIN TASK_HISTORY AS TH ON TH.QUERY_ID = QH.QUERY_ID
  WHERE
    QH.START_TIME > DATEADD(DAY, -30, CURRENT_DATE)
    AND NOT QH.WAREHOUSE_NAME IS NULL
    AND NOT QH.DATABASE_NAME IS NULL
    AND QH.DATABASE_NAME ILIKE 'SERVERLESS_TASKS'
    AND NOT QH.WAREHOUSE_SIZE IS NULL
    AND STATE = 'SUCCEEDED'
)
SELECT
  TO_DATE(START_TIME) AS DATE,
  TASK_NAME,
  FQ_TASK_NAME,
  WAREHOUSE_NAME,
  UPPER(WAREHOUSE_SIZE) AS WAREHOUSE_SIZE,
  COUNT(*) AS TASK_EXECUTION_COUNT,  
  SUM(TOTAL_ELAPSED_HOURS) AS TOTAL_ELAPSED_HOURS,
  SUM(ESTIMATED_CREDITS_USED) AS QUERY_CREDITS
FROM
  QUERY_COST_DETAIL
GROUP BY
  TO_DATE(START_TIME),
  TASK_NAME,
  FQ_TASK_NAME,
  WAREHOUSE_NAME,
  WAREHOUSE_SIZE
ORDER BY
  TASK_NAME,
  DATE;

In [None]:
# Import required packages
import streamlit as st
import plotly.express as px
from snowflake.snowpark.context import get_active_session

# Get the data from the query
session = get_active_session()
df = session.sql("""
WITH TASK_HISTORY AS (
  SELECT
    DISTINCT QUERY_ID,
    NAME AS TASK_NAME,
    STATE,
    DATABASE_NAME || '.' || SCHEMA_NAME || '.' || NAME AS FQ_TASK_NAME
  FROM
    TABLE(RESULT_SCAN($QUERY_ID_TASK_HISTORY))
),
QUERY_COST_DETAIL AS (
  SELECT
    TH.TASK_NAME,
    FQ_TASK_NAME,
    QH.QUERY_ID,
    QH.START_TIME,
    QH.DATABASE_NAME,
    QH.SCHEMA_NAME,
    QH.WAREHOUSE_NAME,
    QH.WAREHOUSE_SIZE,
    TOTAL_ELAPSED_TIME / 3600000.0 AS TOTAL_ELAPSED_HOURS,
    CASE
      UPPER(QH.WAREHOUSE_SIZE)
      WHEN 'X-SMALL' THEN 1
      WHEN 'SMALL' THEN 2
      WHEN 'MEDIUM' THEN 4
      WHEN 'LARGE' THEN 8
      WHEN 'X-LARGE' THEN 16
      WHEN '2X-LARGE' THEN 32
      WHEN '3X-LARGE' THEN 64
      WHEN '4X-LARGE' THEN 128
      ELSE NULL
    END AS CREDITS_PER_HOUR,
    TOTAL_ELAPSED_HOURS * CREDITS_PER_HOUR AS ESTIMATED_CREDITS_USED
  FROM
    SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY AS QH
    JOIN TASK_HISTORY AS TH ON TH.QUERY_ID = QH.QUERY_ID
  WHERE
    QH.START_TIME > DATEADD(DAY, -30, CURRENT_DATE)
    AND NOT QH.WAREHOUSE_NAME IS NULL
    AND NOT QH.DATABASE_NAME IS NULL
    AND TH.TASK_NAME NOT LIKE '%CIS%'
    AND NOT QH.WAREHOUSE_SIZE IS NULL
    AND STATE = 'SUCCEEDED'
)
SELECT
  TO_DATE(START_TIME) AS DATE,
  TASK_NAME,
  FQ_TASK_NAME,
  WAREHOUSE_NAME,
  UPPER(WAREHOUSE_SIZE) AS WAREHOUSE_SIZE,
  COUNT(*) AS TASK_EXECUTION_COUNT,  
  SUM(TOTAL_ELAPSED_HOURS) AS TOTAL_ELAPSED_HOURS,
  SUM(ESTIMATED_CREDITS_USED) AS QUERY_CREDITS
FROM
  QUERY_COST_DETAIL
GROUP BY
  TO_DATE(START_TIME),
  TASK_NAME,
  FQ_TASK_NAME,
  WAREHOUSE_NAME,
  WAREHOUSE_SIZE
ORDER BY
  TASK_NAME,
  DATE
""").to_pandas()

# Create the line chart with custom hover data
fig = px.line(df, 
              x='DATE', 
              y='QUERY_CREDITS',
              color='TASK_NAME',
              title='Query Credits Usage by Task and Day',
              labels={
                  'DATE': 'Date',
                  'QUERY_CREDITS': 'Query Credits Used',
                  'TASK_NAME': 'Task Name',
                  'TOTAL_ELAPSED_HOURS': 'Total Hours'
              },
              hover_data={
                  'TOTAL_ELAPSED_HOURS': ':.2f',
                  'QUERY_CREDITS': ':.2f',
                  'TASK_NAME': True,
                  'DATE': True
              })

# Customize the layout
fig.update_layout(
    height=600,
    showlegend=True,
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=1.02
    ),
    hovermode='x unified'
)

# Display the chart
st.plotly_chart(fig, use_container_width=True)


In [None]:
SELECT
  TO_DATE (START_TIME) AS DATE,
  TASK_NAME,
  DATABASE_NAME || '.' || SCHEMA_NAME || '.' || TASK_NAME AS FQ_TASK_NAME,
  COUNT(*) AS TASK_EXECUTION_COUNT,  
  SUM(CREDITS_USED) AS TOTAL_SERVERLESS_CREDITS,
  ROUND(TOTAL_SERVERLESS_CREDITS / TASK_EXECUTION_COUNT, 5) AS AVG_SERVERLESS_CREDITS_PER_RUN
FROM
  SNOWFLAKE.ACCOUNT_USAGE.SERVERLESS_TASK_HISTORY
WHERE
  START_TIME > DATEADD (DAY, -30, CURRENT_TIMESTAMP())
GROUP BY
  TO_DATE (START_TIME),
  TASK_NAME,
  DATABASE_NAME || '.' || SCHEMA_NAME || '.' || TASK_NAME
ORDER BY
  TASK_NAME,
  DATE;

In [None]:
# Import required packages
import streamlit as st
import plotly.express as px
from snowflake.snowpark.context import get_active_session

# Get the data from the query
session = get_active_session()
df = session.sql("""
SELECT
  TO_DATE(START_TIME) AS DATE,
  TASK_NAME,
  DATABASE_NAME || '.' || SCHEMA_NAME || '.' || TASK_NAME AS FQ_TASK_NAME,
  COUNT(*) AS TASK_EXECUTION_COUNT,  
  SUM(CREDITS_USED) AS TOTAL_SERVERLESS_CREDITS,
  ROUND(TOTAL_SERVERLESS_CREDITS / TASK_EXECUTION_COUNT, 5) AS AVG_SERVERLESS_CREDITS_PER_RUN
FROM
  SNOWFLAKE.ACCOUNT_USAGE.SERVERLESS_TASK_HISTORY
WHERE
  START_TIME > DATEADD(DAY, -30, CURRENT_TIMESTAMP())
GROUP BY
  TO_DATE(START_TIME),
  TASK_NAME,
  DATABASE_NAME || '.' || SCHEMA_NAME || '.' || TASK_NAME
ORDER BY
  TASK_NAME,
  DATE
""").to_pandas()

# Create the line chart with custom hover data
fig = px.line(df, 
              x='DATE', 
              y='TOTAL_SERVERLESS_CREDITS',
              color='TASK_NAME',
              title='Serverless Credits Usage by Task and Day',
              labels={
                  'DATE': 'Date',
                  'TOTAL_SERVERLESS_CREDITS': 'Total Serverless Credits',
                  'TASK_NAME': 'Task Name',
                  'AVG_SERVERLESS_CREDITS_PER_RUN': 'Avg Credits per Run'
              },
              hover_data={
                  'AVG_SERVERLESS_CREDITS_PER_RUN': ':.5f',
                  'TOTAL_SERVERLESS_CREDITS': ':.2f',
                  'TASK_NAME': True,
                  'DATE': True,
                  'TASK_EXECUTION_COUNT': True
              })

# Customize the layout
fig.update_layout(
    height=600,
    showlegend=True,
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=1.02
    ),
    hovermode='x unified'
)

# Display the chart
st.plotly_chart(fig, use_container_width=True)


In [None]:
show warehouses;
SET QUERY_ID_WAREHOUSES = last_query_id();

WITH AUTO_SUSPEND_COST_LOOKUP AS (
  SELECT
    "name" AS WAREHOUSE_NAME,
    "auto_suspend" AS AUTO_SUSPEND,
    "size" AS SIZE,
    CASE
      UPPER("size")
      WHEN 'X-SMALL' THEN 1
      WHEN 'SMALL' THEN 2
      WHEN 'MEDIUM' THEN 4
      WHEN 'LARGE' THEN 8
      WHEN 'X-LARGE' THEN 16
      WHEN '2X-LARGE' THEN 32
      WHEN '3X-LARGE' THEN 64
      WHEN '4X-LARGE' THEN 128
      ELSE 0
    END AS CREDIT_MULTIPLIER,
    (AUTO_SUSPEND / 3600.0) * CREDIT_MULTIPLIER AS TOTAL_AUTO_RESUME_WH_CREDITS
  FROM
    TABLE(RESULT_SCAN($QUERY_ID_WAREHOUSES))
),
TASK_QUERIES AS (
  SELECT
    DISTINCT (QUERY_ID)
  FROM
    TABLE(INFORMATION_SCHEMA.TASK_HISTORY())
  WHERE
    ERROR_CODE IS NULL
)
SELECT
  TO_DATE(TIMESTAMP) AS DATE,
  WEH.WAREHOUSE_NAME,
  UPPER(ASCL.SIZE) AS WAREHOUSE_SIZE,
  ASCL.AUTO_SUSPEND,
  ASCL.CREDIT_MULTIPLIER as wh_credit_multiplier,
  COUNT(*) AS EVENT_COUNT,
  SUM(TOTAL_AUTO_RESUME_WH_CREDITS) AS DAILY_AUTO_RESUME_CREDITS
FROM
  SNOWFLAKE.ACCOUNT_USAGE.WAREHOUSE_EVENTS_HISTORY AS WEH
  JOIN AUTO_SUSPEND_COST_LOOKUP AS ASCL ON ASCL.WAREHOUSE_NAME = WEH.WAREHOUSE_NAME
  JOIN TASK_QUERIES AS TQ ON TQ.QUERY_ID = WEH.QUERY_ID
GROUP BY
  DATE,
  WEH.WAREHOUSE_NAME,
  ASCL.SIZE,
  ASCL.AUTO_SUSPEND,
  ASCL.CREDIT_MULTIPLIER
ORDER BY
  DATE,
  WEH.WAREHOUSE_NAME;

In [None]:
# Import required packages
import streamlit as st
import plotly.express as px
from snowflake.snowpark.context import get_active_session

# Get the data from the query
session = get_active_session()
df = session.sql("""
WITH AUTO_SUSPEND_COST_LOOKUP AS (
  SELECT
    "name" AS WAREHOUSE_NAME,
    "auto_suspend" AS AUTO_SUSPEND,
    "size" AS SIZE,
    CASE
      UPPER("size")
      WHEN 'X-SMALL' THEN 1
      WHEN 'SMALL' THEN 2
      WHEN 'MEDIUM' THEN 4
      WHEN 'LARGE' THEN 8
      WHEN 'X-LARGE' THEN 16
      WHEN '2X-LARGE' THEN 32
      WHEN '3X-LARGE' THEN 64
      WHEN '4X-LARGE' THEN 128
      ELSE 0
    END AS CREDIT_MULTIPLIER,
    (AUTO_SUSPEND / 3600.0) * CREDIT_MULTIPLIER AS TOTAL_AUTO_RESUME_WH_CREDITS
  FROM
    TABLE(RESULT_SCAN($QUERY_ID_WAREHOUSES))
),
TASK_QUERIES AS (
  SELECT
    DISTINCT (QUERY_ID)
  FROM
    TABLE(INFORMATION_SCHEMA.TASK_HISTORY())
  WHERE
    ERROR_CODE IS NULL
)
SELECT
  TO_DATE(TIMESTAMP) AS DATE,
  WEH.WAREHOUSE_NAME,
  UPPER(ASCL.SIZE) AS WAREHOUSE_SIZE,
  ASCL.AUTO_SUSPEND,
  ASCL.CREDIT_MULTIPLIER as WH_CREDIT_MULTIPLIER,
  COUNT(*) AS EVENT_COUNT,
  SUM(TOTAL_AUTO_RESUME_WH_CREDITS) AS DAILY_AUTO_RESUME_CREDITS
FROM
  SNOWFLAKE.ACCOUNT_USAGE.WAREHOUSE_EVENTS_HISTORY AS WEH
  JOIN AUTO_SUSPEND_COST_LOOKUP AS ASCL ON ASCL.WAREHOUSE_NAME = WEH.WAREHOUSE_NAME
  JOIN TASK_QUERIES AS TQ ON TQ.QUERY_ID = WEH.QUERY_ID
GROUP BY
  DATE,
  WEH.WAREHOUSE_NAME,
  ASCL.SIZE,
  ASCL.AUTO_SUSPEND,
  ASCL.CREDIT_MULTIPLIER
ORDER BY
  DATE,
  WEH.WAREHOUSE_NAME
""").to_pandas()

# Create the line chart with custom hover data
fig = px.line(df, 
              x='DATE', 
              y='DAILY_AUTO_RESUME_CREDITS',
              color='WAREHOUSE_NAME',
              title='Daily Auto-Resume Credits by Warehouse',
              labels={
                  'DATE': 'Date',
                  'DAILY_AUTO_RESUME_CREDITS': 'Auto-Resume Credits',
                  'WAREHOUSE_NAME': 'Warehouse Name',
                  'WAREHOUSE_SIZE': 'Warehouse Size',
                  'EVENT_COUNT': 'Resume Events'
              },
              hover_data={
                  'WAREHOUSE_SIZE': True,
                  'EVENT_COUNT': True,
                  'AUTO_SUSPEND': True,
                  'DAILY_AUTO_RESUME_CREDITS': ':.2f',
                  'WAREHOUSE_NAME': True,
                  'DATE': True
              })

# Customize the layout
fig.update_layout(
    height=600,
    showlegend=True,
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=1.02
    ),
    hovermode='x unified'
)

# Display the chart
st.plotly_chart(fig, use_container_width=True)


In [None]:
# Import required packages
import streamlit as st
import plotly.express as px
from snowflake.snowpark.context import get_active_session

# Get the data combining both serverless and warehouse credits
session = get_active_session()
df = session.sql("""
WITH SERVERLESS_COSTS AS (
    SELECT
        COALESCE(DATABASE_NAME, 'Unknown') AS DATABASE_NAME,
        COALESCE(TASK_NAME, 'Unknown') AS TASK_NAME,
        SUM(TOTAL_SERVERLESS_CREDITS) AS SERVERLESS_CREDITS
    FROM (
        SELECT
            DATABASE_NAME,
            TASK_NAME,
            SUM(CREDITS_USED) AS TOTAL_SERVERLESS_CREDITS
        FROM SNOWFLAKE.ACCOUNT_USAGE.SERVERLESS_TASK_HISTORY
        WHERE START_TIME > DATEADD(DAY, -30, CURRENT_TIMESTAMP())
        GROUP BY DATABASE_NAME, TASK_NAME
    )
    GROUP BY DATABASE_NAME, TASK_NAME
),
WAREHOUSE_COSTS AS (
    SELECT 
        COALESCE(DATABASE_NAME, 'Unknown') AS DATABASE_NAME,
        COALESCE(TASK_NAME, 'Unknown') AS TASK_NAME,
        SUM(QUERY_CREDITS) AS WAREHOUSE_CREDITS
    FROM (
        WITH TASK_HISTORY AS (
            SELECT DISTINCT 
                QUERY_ID,
                DATABASE_NAME,
                NAME AS TASK_NAME,
                STATE
            FROM TABLE(RESULT_SCAN($QUERY_ID_TASK_HISTORY))
            WHERE STATE = 'SUCCEEDED'
        )
        SELECT
            TH.DATABASE_NAME,
            TH.TASK_NAME,
            (QH.TOTAL_ELAPSED_TIME / 3600000.0) * 
            CASE UPPER(QH.WAREHOUSE_SIZE)
                WHEN 'X-SMALL' THEN 1
                WHEN 'SMALL' THEN 2
                WHEN 'MEDIUM' THEN 4
                WHEN 'LARGE' THEN 8
                WHEN 'X-LARGE' THEN 16
                WHEN '2X-LARGE' THEN 32
                WHEN '3X-LARGE' THEN 64
                WHEN '4X-LARGE' THEN 128
                ELSE 0
            END AS QUERY_CREDITS
        FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY QH
        JOIN TASK_HISTORY TH ON TH.QUERY_ID = QH.QUERY_ID
        WHERE QH.START_TIME > DATEADD(DAY, -30, CURRENT_DATE)
    )
    GROUP BY DATABASE_NAME, TASK_NAME
)
SELECT 
    COALESCE(s.DATABASE_NAME, w.DATABASE_NAME) AS DATABASE_NAME,
    COALESCE(s.TASK_NAME, w.TASK_NAME) AS TASK_NAME,
    COALESCE(s.SERVERLESS_CREDITS, 0) AS SERVERLESS_CREDITS,
    COALESCE(w.WAREHOUSE_CREDITS, 0) AS WAREHOUSE_CREDITS,
    SERVERLESS_CREDITS + WAREHOUSE_CREDITS AS TOTAL_CREDITS
FROM SERVERLESS_COSTS s
FULL OUTER JOIN WAREHOUSE_COSTS w 
    ON s.DATABASE_NAME = w.DATABASE_NAME 
    AND s.TASK_NAME = w.TASK_NAME
WHERE COALESCE(s.DATABASE_NAME, w.DATABASE_NAME) IS NOT NULL
  AND COALESCE(s.TASK_NAME, w.TASK_NAME) IS NOT NULL
ORDER BY DATABASE_NAME, TASK_NAME
""").to_pandas()

# Handle empty dataframe case
if df.empty:
    st.warning("No task data available for the selected period.")
else:
    # Add database filter
    databases = sorted(df['DATABASE_NAME'].unique())
    selected_database = st.selectbox(
        'Select Database',
        options=databases,
        key='database_selector'
    )

    # Filter tasks by selected database
    db_filtered_df = df[df['DATABASE_NAME'] == selected_database]

    # Add task selector
    tasks = sorted(db_filtered_df['TASK_NAME'].unique())
    selected_task = st.selectbox(
        'Select Task',
        options=tasks,
        key='task_selector'
    )

    # Filter data based on task selection
    filtered_df = db_filtered_df[db_filtered_df['TASK_NAME'] == selected_task]

    # Create summary by task
    task_summary = filtered_df.groupby('TASK_NAME').agg({
        'SERVERLESS_CREDITS': 'sum',
        'WAREHOUSE_CREDITS': 'sum',
        'TOTAL_CREDITS': 'sum'
    }).reset_index()

    # Sort by total credits
    task_summary = task_summary.sort_values('TOTAL_CREDITS', ascending=True)

    # Create the horizontal bar chart
    fig = px.bar(task_summary, 
                 y='TASK_NAME',
                 x=['SERVERLESS_CREDITS', 'WAREHOUSE_CREDITS'],
                 orientation='h',
                 title=f'Credits Usage for Task: {selected_task}',
                 labels={
                     'value': 'Credits Used',
                     'TASK_NAME': 'Task Name',
                     'variable': 'Credit Type'
                 },
                 color_discrete_map={
                     'SERVERLESS_CREDITS': '#1f77b4',
                     'WAREHOUSE_CREDITS': '#ff7f0e'
                 })

    # Customize the layout
    fig.update_layout(
        height=max(400, len(task_summary) * 30),
        barmode='stack',
        showlegend=True,
        legend_title_text='Credit Type',
        xaxis_title='Total Credits',
        yaxis={'categoryorder': 'total ascending'}
    )

    # Display the chart
    st.plotly_chart(fig, use_container_width=True)

    # Display summary metrics
    st.markdown("### Summary Statistics")
    col1, col2, col3 = st.columns(3)
    with col1:
        st.metric("Total Credits", f"{task_summary['TOTAL_CREDITS'].sum():.2f}")
    with col2:
        st.metric("Serverless Credits", f"{task_summary['SERVERLESS_CREDITS'].sum():.2f}")
    with col3:
        st.metric("Warehouse Credits", f"{task_summary['WAREHOUSE_CREDITS'].sum():.2f}")


In [None]:
# Import required packages
import streamlit as st
import plotly.express as px
from snowflake.snowpark.context import get_active_session

# Get the data combining both serverless and warehouse credits
session = get_active_session()
df = session.sql("""
WITH SERVERLESS_COSTS AS (
    SELECT
        DATE,
        TASK_NAME,
        SUM(TOTAL_SERVERLESS_CREDITS) AS SERVERLESS_CREDITS
    FROM (
        SELECT
            TO_DATE(START_TIME) AS DATE,
            TASK_NAME,
            SUM(CREDITS_USED) AS TOTAL_SERVERLESS_CREDITS
        FROM SNOWFLAKE.ACCOUNT_USAGE.SERVERLESS_TASK_HISTORY
        WHERE START_TIME > DATEADD(DAY, -30, CURRENT_TIMESTAMP())
        GROUP BY DATE, TASK_NAME
    )
    GROUP BY DATE, TASK_NAME
),
WAREHOUSE_COSTS AS (
    SELECT 
        DATE,
        TASK_NAME,
        SUM(QUERY_CREDITS) AS WAREHOUSE_CREDITS
    FROM (
        WITH TASK_HISTORY AS (
            SELECT DISTINCT 
                QUERY_ID,
                NAME AS TASK_NAME,
                STATE
            FROM TABLE(RESULT_SCAN($QUERY_ID_TASK_HISTORY))
            WHERE STATE = 'SUCCEEDED'
        )
        SELECT
            TO_DATE(QH.START_TIME) AS DATE,
            TH.TASK_NAME,
            (QH.TOTAL_ELAPSED_TIME / 3600000.0) * 
            CASE UPPER(QH.WAREHOUSE_SIZE)
                WHEN 'X-SMALL' THEN 1
                WHEN 'SMALL' THEN 2
                WHEN 'MEDIUM' THEN 4
                WHEN 'LARGE' THEN 8
                WHEN 'X-LARGE' THEN 16
                WHEN '2X-LARGE' THEN 32
                WHEN '3X-LARGE' THEN 64
                WHEN '4X-LARGE' THEN 128
                ELSE 0
            END AS QUERY_CREDITS
        FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY QH
        JOIN TASK_HISTORY TH ON TH.QUERY_ID = QH.QUERY_ID
        WHERE QH.START_TIME > DATEADD(DAY, -30, CURRENT_DATE)
    )
    GROUP BY DATE, TASK_NAME
)
SELECT 
    COALESCE(s.DATE, w.DATE) AS DATE,
    COALESCE(s.TASK_NAME, w.TASK_NAME) AS TASK_NAME,
    COALESCE(s.SERVERLESS_CREDITS, 0) AS SERVERLESS_CREDITS,
    COALESCE(w.WAREHOUSE_CREDITS, 0) AS WAREHOUSE_CREDITS,
    SERVERLESS_CREDITS + WAREHOUSE_CREDITS AS TOTAL_CREDITS
FROM SERVERLESS_COSTS s
FULL OUTER JOIN WAREHOUSE_COSTS w 
    ON s.DATE = w.DATE 
    AND s.TASK_NAME = w.TASK_NAME
ORDER BY DATE, TASK_NAME
""").to_pandas()

# Add task selector
all_tasks = sorted(df['TASK_NAME'].unique())
selected_task = st.selectbox(
    'Select Task to Display',
    options=all_tasks,
    key='single_task_selector'
)

# Filter data for selected task
task_df = df[df['TASK_NAME'] == selected_task]

# Create the stacked bar chart
fig = px.bar(task_df, 
             x='DATE',
             y=['SERVERLESS_CREDITS', 'WAREHOUSE_CREDITS'],
             title=f'Daily Credits Usage for Task: {selected_task}',
             labels={
                 'DATE': 'Date',
                 'value': 'Credits Used',
                 'variable': 'Credit Type'
             },
             color_discrete_map={
                 'SERVERLESS_CREDITS': '#1f77b4',
                 'WAREHOUSE_CREDITS': '#ff7f0e'
             })

# Customize the layout
fig.update_layout(
    height=500,
    showlegend=True,
    legend_title_text='Credit Type',
    xaxis_title='Date',
    yaxis_title='Credits',
    barmode='stack'
)

# Display the chart
st.plotly_chart(fig, use_container_width=True)

# Display summary metrics for the selected task
st.markdown("### Summary Statistics")
task_summary = task_df.agg({
    'SERVERLESS_CREDITS': 'sum',
    'WAREHOUSE_CREDITS': 'sum',
    'TOTAL_CREDITS': 'sum'
}).round(2)

col1, col2, col3 = st.columns(3)
with col1:
    st.metric("Total Credits", f"{task_summary['TOTAL_CREDITS']:.2f}")
with col2:
    st.metric("Serverless Credits", f"{task_summary['SERVERLESS_CREDITS']:.2f}")
with col3:
    st.metric("Warehouse Credits", f"{task_summary['WAREHOUSE_CREDITS']:.2f}")


In [None]:
# Import required packages
import streamlit as st
import plotly.express as px
from snowflake.snowpark.context import get_active_session
from datetime import datetime, timedelta

# Add date range selector
date_range = st.selectbox(
    'Select Time Period',
    options=['Last 7 Days', 'Last 30 Days', 'Last 90 Days', 'All Time'],
    key='date_range_selector'
)

# Get the data combining both serverless and warehouse credits
session = get_active_session()
df = session.sql(f"""
WITH SERVERLESS_COSTS AS (
    SELECT
        TASK_NAME,
        SUM(TOTAL_SERVERLESS_CREDITS) AS SERVERLESS_CREDITS
    FROM (
        SELECT
            TASK_NAME,
            SUM(CREDITS_USED) AS TOTAL_SERVERLESS_CREDITS
        FROM SNOWFLAKE.ACCOUNT_USAGE.SERVERLESS_TASK_HISTORY
        WHERE START_TIME > DATEADD(DAY, 
            CASE 
                WHEN '{date_range}' = 'Last 7 Days' THEN -7
                WHEN '{date_range}' = 'Last 30 Days' THEN -30
                WHEN '{date_range}' = 'Last 90 Days' THEN -90
                ELSE -36500  -- ~100 years for "All Time"
            END, 
            CURRENT_TIMESTAMP())
        GROUP BY TASK_NAME
    )
    GROUP BY TASK_NAME
),
WAREHOUSE_COSTS AS (
    SELECT 
        TASK_NAME,
        SUM(QUERY_CREDITS) AS WAREHOUSE_CREDITS
    FROM (
        WITH TASK_HISTORY AS (
            SELECT DISTINCT 
                QUERY_ID,
                NAME AS TASK_NAME,
                STATE
            FROM TABLE(RESULT_SCAN($QUERY_ID_TASK_HISTORY))
            WHERE STATE = 'SUCCEEDED'
        )
        SELECT
            TH.TASK_NAME,
            (QH.TOTAL_ELAPSED_TIME / 3600000.0) * 
            CASE UPPER(QH.WAREHOUSE_SIZE)
                WHEN 'X-SMALL' THEN 1
                WHEN 'SMALL' THEN 2
                WHEN 'MEDIUM' THEN 4
                WHEN 'LARGE' THEN 8
                WHEN 'X-LARGE' THEN 16
                WHEN '2X-LARGE' THEN 32
                WHEN '3X-LARGE' THEN 64
                WHEN '4X-LARGE' THEN 128
                ELSE 0
            END AS QUERY_CREDITS
        FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY QH
        JOIN TASK_HISTORY TH ON TH.QUERY_ID = QH.QUERY_ID
        WHERE QH.START_TIME > DATEADD(DAY, 
            CASE 
                WHEN '{date_range}' = 'Last 7 Days' THEN -7
                WHEN '{date_range}' = 'Last 30 Days' THEN -30
                WHEN '{date_range}' = 'Last 90 Days' THEN -90
                ELSE -36500  -- ~100 years for "All Time"
            END, 
            CURRENT_DATE)
    )
    GROUP BY TASK_NAME
)
SELECT 
    COALESCE(s.TASK_NAME, w.TASK_NAME) AS TASK_NAME,
    COALESCE(s.SERVERLESS_CREDITS, 0) AS SERVERLESS_CREDITS,
    COALESCE(w.WAREHOUSE_CREDITS, 0) AS WAREHOUSE_CREDITS,
    SERVERLESS_CREDITS + WAREHOUSE_CREDITS AS TOTAL_CREDITS
FROM SERVERLESS_COSTS s
FULL OUTER JOIN WAREHOUSE_COSTS w ON s.TASK_NAME = w.TASK_NAME
QUALIFY ROW_NUMBER() OVER (ORDER BY TOTAL_CREDITS DESC) <= 20
ORDER BY TOTAL_CREDITS DESC
""").to_pandas()

# Create the horizontal bar chart
fig = px.bar(df, 
             y='TASK_NAME',
             x=['SERVERLESS_CREDITS', 'WAREHOUSE_CREDITS'],
             orientation='h',
             title=f'Top 20 Tasks by Credit Usage ({date_range})',
             labels={
                 'value': 'Credits Used',
                 'TASK_NAME': 'Task Name',
                 'variable': 'Credit Type'
             },
             color_discrete_map={
                 'SERVERLESS_CREDITS': '#1f77b4',
                 'WAREHOUSE_CREDITS': '#ff7f0e'
             })

# Customize the layout
fig.update_layout(
    height=600,
    barmode='stack',
    showlegend=True,
    legend_title_text='Credit Type',
    xaxis_title='Total Credits',
    yaxis={'categoryorder': 'total ascending'}
)

# Display the chart
st.plotly_chart(fig, use_container_width=True)

# Display summary metrics
st.markdown("### Summary Statistics")
col1, col2, col3 = st.columns(3)
with col1:
    st.metric("Total Credits", f"{df['TOTAL_CREDITS'].sum():.2f}")
with col2:
    st.metric("Serverless Credits", f"{df['SERVERLESS_CREDITS'].sum():.2f}")
with col3:
    st.metric("Warehouse Credits", f"{df['WAREHOUSE_CREDITS'].sum():.2f}")


In [None]:
def get_cron_times(schedule, sample_runs=100):
    """
    Returns list of (weekday, minute of day) for a schedule expression.
    Handles both CRON and interval-based schedules.

    Args:
        schedule (str): Schedule string (CRON or interval)
        sample_runs (int): How many executions to sample

    Returns:
        List[Tuple[str, int]]: List of (weekday, minute_of_day)
    """
    if schedule is None:
        return []

    # Handle interval-based schedules
    interval_match = re.search(r'(\d+)\s*(MINUTE|MINUTES|HOUR|HOURS|SECOND|SECONDS)', schedule.upper())
    if interval_match:
        number, unit = interval_match.groups()
        number = int(number)
        
        # Convert to minutes
        if 'HOUR' in unit:
            interval_minutes = number * 60
        elif 'SECOND' in unit:
            interval_minutes = number / 60
        else:
            interval_minutes = number

        # Generate times for a week
        times = []
        start_time = datetime.now(pytz.utc)
        current_time = start_time
        
        for _ in range(sample_runs):
            weekday = current_time.strftime("%A")
            minute_of_day = current_time.hour * 60 + current_time.minute
            times.append((weekday, minute_of_day))
            current_time += timedelta(minutes=interval_minutes)
            
            # Stop if we've gone beyond a week
            if (current_time - start_time).days >= 7:
                break
                
        return times

    # Handle CRON expressions
    try:
        # Extract timezone if present
        tz_match = re.search(r'\s+([A-Z]{3,4})$', schedule)
        if tz_match:
            timezone = tz_match.group(1)
            # Remove timezone from schedule for croniter
            schedule = schedule[:-(len(timezone)+1)].strip()
            try:
                tz = pytz.timezone(f'America/{timezone}')
            except:
                tz = pytz.timezone('UTC')
        else:
            tz = pytz.timezone('UTC')

        # For CRON expressions with "USING CRON"
        if 'USING CRON' in schedule.upper():
            cron_part = re.search(r'USING CRON\s+([^"]+)', schedule)
            if cron_part:
                schedule = cron_part.group(1).strip()

        iter = croniter(schedule, datetime.now(tz))
        times = []

        for _ in range(sample_runs):
            next_run = iter.get_next(datetime)
            # Convert to UTC for consistent display
            if next_run.tzinfo is None:
                next_run = tz.localize(next_run)
            next_run_utc = next_run.astimezone(pytz.UTC)
            weekday = next_run_utc.strftime("%A")
            minute_of_day = next_run_utc.hour * 60 + next_run_utc.minute
            times.append((weekday, minute_of_day))

        return times
    except Exception as e:
        st.warning(f"Could not parse CRON expression: {schedule}. Error: {str(e)}")
        return []


In [None]:
def plot_cron_heatmap(cron_times, cron_expr):
    """
    Plot a static heatmap of cron job times using matplotlib and seaborn.
    Color coding:
    - Yellow: 2x duration < 60 minutes
    - Red: 2x duration overlaps with next run
    - Green: Otherwise
    """
    if not cron_times:
        print("No data to plot.")
        return

    # Define the order of days for consistent Y-axis placement
    day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    day_to_num = {day: i for i, day in enumerate(day_order)}

    # Create DataFrame from cron job times
    df = pd.DataFrame(cron_times, columns=['Day', 'MinuteOfDay'])
    df['DayNum'] = df['Day'].map(day_to_num)

    # Create the heatmap data first
    heatmap_data = df.groupby(['DayNum', 'MinuteOfDay']).size().unstack(fill_value=0)
    
    # Ensure all 1440 minutes are represented
    all_minutes = list(range(0, 1440))
    heatmap_data = heatmap_data.reindex(columns=all_minutes, fill_value=0)
    
    # Reorder rows so Sunday appears at the bottom
    heatmap_data = heatmap_data.reindex(index=sorted(heatmap_data.index, reverse=True))

    # Get unique start times to analyze intervals
    start_times = []
    for day, minute in set(cron_times):
        day_num = day_to_num[day]
        absolute_minute = day_num * 1440 + minute
        start_times.append((day_num, minute, absolute_minute))
    start_times.sort(key=lambda x: (x[0], x[1]))

    # Calculate intervals and determine colors
    duration_minutes = max(minute for _, minute in cron_times) - min(minute for _, minute in cron_times) + 1
    double_duration = duration_minutes * 2

    # Create color map based on conditions
    color_map = {}
    for i, (day_num, minute, abs_minute) in enumerate(start_times):
        # Find next run time (wrap around to next day if needed)
        next_idx = (i + 1) % len(start_times)
        next_day_num, next_minute, next_abs_minute = start_times[next_idx]
        
        if next_idx == 0:  # Wrap to next week
            next_abs_minute += 7 * 1440

        interval = next_abs_minute - abs_minute if next_idx != 0 else float('inf')
        
        if double_duration < 60:
            color = 'yellow'
        elif interval < double_duration:
            color = 'red'
        else:
            color = 'green'

        for m in range(minute, minute + duration_minutes):
            if m < 1440:
                color_map[(day_num, m)] = color

    # Create custom color matrix with softer colors
    soft_red = (1, 0.7, 0.7)     # Light red
    soft_yellow = (1, 1, 0.7)    # Light yellow
    soft_green = (0.7, 1, 0.7)   # Light green
    light_grey = (0.95, 0.95, 0.95)  # Light grey background

    # Create custom color matrix with light grey background
    color_matrix = np.full(heatmap_data.shape + (3,), light_grey)
    for i, day_num in enumerate(heatmap_data.index):
        for j, minute in enumerate(heatmap_data.columns):
            if heatmap_data.iloc[i, j] > 0:
                if (day_num, minute) in color_map:
                    if color_map[(day_num, minute)] == 'red':
                        color_matrix[i, j] = soft_red
                    elif color_map[(day_num, minute)] == 'yellow':
                        color_matrix[i, j] = soft_yellow
                    else:
                        color_matrix[i, j] = soft_green

    # Create X-axis labels
    xtick_labels = [f"{m // 60:02d}:{m % 60:02d}" for m in heatmap_data.columns]
    ytick_labels = [day_order[i] for i in heatmap_data.index]

    # Create the heatmap
    plt.figure(figsize=(18, 6))
    ax = plt.gca()
    ax.imshow(color_matrix, aspect='auto')
    
    # Set labels and title
    ax.set_xlabel('Time of Day')
    ax.set_ylabel('Day of Week')
    ax.set_title(f'Cron Format: {cron_expr}', fontsize=16, pad=20)
    
    # Set ticks
    step = max(1, len(xtick_labels) // 24)
    ax.set_xticks(range(0, len(xtick_labels), step))
    ax.set_xticklabels(xtick_labels[::step], rotation=45, ha='right')
    ax.set_yticks(range(len(ytick_labels)))
    ax.set_yticklabels(ytick_labels)

    # Set background colors
    ax.set_facecolor(light_grey)
    plt.gcf().patch.set_facecolor(light_grey)

    plt.tight_layout()
    plt.show()

# Sample call
cron_expr = "0 9,11,13,15,17 * * 1-5"  # Runs at 9 AM, 11 AM, 1 PM, 3 PM, and 5 PM on weekdays
duration_minutes = 5  # Each job runs for 30 minutes

# Get base schedule from cron expression
base_times = get_cron_times(cron_expr, sample_runs=25)  # 5 times per day * 5 weekdays

# Create extended time slots to show duration
extended_times = []
for day, start_minute in base_times:
    # Add an entry for each minute of the job's duration
    for minute in range(start_minute, start_minute + duration_minutes):
        if minute < 1440:  # Ensure we don't exceed minutes in a day
            extended_times.append((day, minute))

# Plot the heatmap with the extended time slots
plot_cron_heatmap(extended_times, cron_expr)


In [None]:
def get_minimum_interval_minutes(cron_times):
    """
    Calculate the minimum time interval (in minutes) between any two job runs
    in a weekly cron schedule.

    Args:
        cron_times (list of tuples): [(weekday_str, minute_of_day), ...]

    Returns:
        int or None: Minimum interval in minutes, or None if < 2 jobs exist.
    """
    if len(cron_times) < 2:
        return None

    # Map days to numbers (0 = Monday ... 6 = Sunday)
    day_to_num = {
        'Monday': 0, 'Tuesday': 1, 'Wednesday': 2,
        'Thursday': 3, 'Friday': 4, 'Saturday': 5, 'Sunday': 6
    }

    # Convert (day, minute_of_day) -> absolute minute in the week
    absolute_minutes = [
        day_to_num[day] * 1440 + minute for day, minute in cron_times
    ]

    # Sort and compute all pairwise differences (modulo 10080 to wrap week)
    absolute_minutes.sort()
    intervals = [
        (absolute_minutes[i+1] - absolute_minutes[i]) for i in range(len(absolute_minutes) - 1)
    ]

    # Add wrap-around interval (last to first, looping into next week)
    intervals.append((absolute_minutes[0] + 10080) - absolute_minutes[-1])

    return min(intervals)


In [None]:
# Import required packages
import streamlit as st
import plotly.express as px
import plotly.graph_objects as go
from snowflake.snowpark.context import get_active_session
from datetime import datetime, timedelta
import pytz
import re
import pandas as pd
from croniter import croniter

# Get task information with schedules and execution history
session = get_active_session()
df = session.sql("""
WITH task_executions AS (
    SELECT 
        NAME AS TASK_NAME,
        AVG(DATEDIFF(SECOND, SCHEDULED_TIME, COMPLETED_TIME)) AS AVG_EXECUTION_SECONDS,
        COUNT(DISTINCT SCHEDULED_TIME) AS EXECUTION_COUNT,
        MIN(SCHEDULED_TIME) AS FIRST_EXECUTION,
        MAX(SCHEDULED_TIME) AS LAST_EXECUTION
    FROM TABLE(RESULT_SCAN($QUERY_ID_TASK_HISTORY))
    WHERE STATE = 'SUCCEEDED'
    GROUP BY NAME
)
SELECT 
    t.TASK_NAME,
    t.DATABASE_NAME,
    t.SCHEMA_NAME,
    t.SCHEDULE,
    t.WAREHOUSE,
    t.TASK_TYPE,
    COALESCE(te.AVG_EXECUTION_SECONDS, 300) AS AVG_EXECUTION_SECONDS,
    te.EXECUTION_COUNT,
    te.FIRST_EXECUTION,
    te.LAST_EXECUTION
FROM TABLE(RESULT_SCAN($QUERY_ID_CORE_INFORMATION)) t
LEFT JOIN task_executions te ON t.TASK_NAME = te.TASK_NAME
WHERE t.SCHEDULE IS NOT NULL
ORDER BY t.DATABASE_NAME, t.TASK_NAME
""").to_pandas()

# Create form for filters
with st.form("schedule_filters"):
    col1, col2 = st.columns(2)
    
    with col1:
        # Database filter
        databases = sorted(df['DATABASE_NAME'].unique())
        selected_database = st.selectbox('Select Database', options=databases)
        
        # Show only active tasks option
        show_active = st.checkbox('Show Only Active Tasks', value=True)
    
    with col2:
        # Schema filter
        schemas = sorted(df[df['DATABASE_NAME'] == selected_database]['SCHEMA_NAME'].unique())
        selected_schema = st.selectbox('Select Schema', options=schemas)
        
        # Minimum executions filter
        min_executions = st.number_input('Minimum Executions', min_value=0, value=1)
    
    submitted = st.form_submit_button("Apply Filters")

# Filter data based on form inputs
df_filtered = df[
    (df['DATABASE_NAME'] == selected_database) &
    (df['SCHEMA_NAME'] == selected_schema)
]

if show_active:
    df_filtered = df_filtered[df_filtered['EXECUTION_COUNT'] >= min_executions]

# Add task selector
tasks = sorted(df_filtered['TASK_NAME'].unique())
if tasks:
    selected_task = st.selectbox('Select Task to Display', options=tasks)
    
    # Filter for selected task
    df_filtered = df_filtered[df_filtered['TASK_NAME'] == selected_task]

# Process schedules and create visualization data
task_schedules = []
for _, row in df_filtered.iterrows():
    try:
        # Get next 7 days of runs
        cron_times = get_cron_times(row['SCHEDULE'], sample_runs=35)  # 5 runs per day * 7 days
        
        for day, minute in cron_times:
            # Convert minute of day to hours for better visualization
            start_hour = minute / 60
            runtime_hours = row['AVG_EXECUTION_SECONDS'] / 3600  # Convert seconds to hours
            
            task_schedules.append({
                'Task': row['TASK_NAME'],
                'Day': day,
                'Start': start_hour,
                'Duration': runtime_hours,
                'Warehouse': row['WAREHOUSE'] or 'Serverless',
                'Type': row['TASK_TYPE'],
                'Executions': row['EXECUTION_COUNT'] or 0,
                'Avg Runtime': f"{runtime_hours:.2f} hours",
                'Schedule': row['SCHEDULE']
            })
    except Exception as e:
        st.warning(f"Could not process schedule for task {row['TASK_NAME']}: {e}")

if task_schedules:
    # Create DataFrame for visualization
    schedule_df = pd.DataFrame(task_schedules)
    
    # Summary metrics for selected task
    st.markdown(f"### Task Schedule: {selected_task}")

    # Custom CSS to reduce metric sizes
    st.markdown("""
        <style>
            div[data-testid="metric-container"] {
                padding: 10px !important;
            }
            
            div[data-testid="metric-container"] > div {
                font-size: 12px !important;
            }
            
            div[data-testid="metric-container"] label {
                font-size: 10px !important;
                color: rgb(100, 100, 100) !important;
            }
            
            div[data-testid="stHorizontalBlock"] {
                gap: 10px !important;
            }
        </style>
    """, unsafe_allow_html=True)

    # Create metrics in a row with smaller sizes
    col1, col2, col3, col4 = st.columns(4)
    with col1:
        st.metric("Task Type", schedule_df['Type'].iloc[0])
    with col2:
        st.metric("Warehouse", schedule_df['Warehouse'].iloc[0])
    with col3:
        st.metric("Executions", f"{schedule_df['Executions'].iloc[0]:,.0f}")
    with col4:
        st.metric("Avg Runtime", schedule_df['Avg Runtime'].iloc[0])

    # Display schedule expression
    st.markdown(f"**Schedule Expression:** `{schedule_df['Schedule'].iloc[0]}`")

    # Create Gantt-like chart using plotly
    fig = go.Figure()

    # Define day order for consistent display
    day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

    # Color scheme based on task type
    color_map = {
        'SERVERLESS': '#1f77b4',
        'USER_MANAGED': '#ff7f0e',
        'FLEXIBLE': '#2ca02c'
    }

    # Add bars for the selected task
    task_type = schedule_df['Type'].iloc[0]
    fig.add_trace(go.Bar(
        name=selected_task,
        x=schedule_df['Duration'],
        y=schedule_df['Day'],
        orientation='h',
        base=schedule_df['Start'],
        marker_color=color_map.get(task_type, '#7f7f7f'),
        customdata=schedule_df[['Task', 'Start', 'Duration', 'Warehouse', 'Type', 'Executions', 'Avg Runtime']],
        hovertemplate=(
            "<b>%{customdata[0]}</b><br>" +
            "Start Time: %{customdata[1]:.2f}<br>" +
            "Duration: %{customdata[6]}<br>" +
            "Warehouse: %{customdata[3]}<br>" +
            "Type: %{customdata[4]}<br>" +
            "Executions: %{customdata[5]}<br>" +
            "Day: %{y}<br>" +
            "<extra></extra>"
        )
    ))

    # Update layout
    fig.update_layout(
        title={
            'text': f'Weekly Schedule for {selected_task}',
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'
        },
        xaxis_title='Hour of Day',
        yaxis_title='Day of Week',
        height=400,
        barmode='overlay',
        yaxis={
            'categoryorder': 'array', 
            'categoryarray': day_order,
            'gridcolor': 'lightgrey'
        },
        xaxis={
            'range': [0, 24],
            'gridcolor': 'lightgrey',
            'dtick': 1
        },
        plot_bgcolor='white',
        showlegend=False,
        hovermode='closest'
    )

    # Display the chart
    st.plotly_chart(fig, use_container_width=True)

else:
    st.warning("No schedule data available for the selected task.")
