In [None]:
import os
import pandas as pd
from tabulate import tabulate
from dotenv import load_dotenv
import snowflake.connector

load_dotenv()

conn = snowflake.connector.connect(
    user=os.getenv('SNOWFLAKE_USER'),
    password=os.getenv('SNOWFLAKE_PASSWORD'),
    account=os.getenv('SNOWFLAKE_ACCOUNT'),
    warehouse=os.getenv('SNOWFLAKE_WAREHOUSE', 'COMPUTE_WH'),
    database='INCREMENTALITY',
    schema='INCREMENTALITY_RESEARCH'
)
cursor = conn.cursor()

def run_query(query):
    cursor.execute(query)
    results = cursor.fetchall()
    if results:
        columns = [desc[0] for desc in cursor.description]
        return pd.DataFrame(results, columns=columns)
    return pd.DataFrame()

def show_table(df, title=""):
    if title:
        print(f"\n{title}")
        print("="*len(title))
    print(tabulate(df, headers='keys', tablefmt='grid', showindex=False))

print("✅ Connected to Snowflake")

✅ Connected to Snowflake


In [None]:
# Count rows in 1 hour window
counts = []

# AUCTIONS tables
query = "SELECT COUNT(*) as cnt FROM AUCTIONS_USERS WHERE CREATED_AT >= '2025-07-01 00:00:00' AND CREATED_AT < '2025-07-02 01:00:00'"
df = run_query(query)
counts.append(['AUCTIONS_USERS', df['CNT'].iloc[0]])

query = "SELECT COUNT(*) as cnt FROM AUCTIONS_RESULTS WHERE CREATED_AT >= '2025-07-01 00:00:00' AND CREATED_AT < '2025-07-02 01:00:00'"
df = run_query(query)
counts.append(['AUCTIONS_RESULTS', df['CNT'].iloc[0]])

# Event tables
query = "SELECT COUNT(*) as cnt FROM IMPRESSIONS WHERE OCCURRED_AT >= '2025-07-01 00:00:00' AND OCCURRED_AT < '2025-07-02 01:00:00'"
df = run_query(query)
counts.append(['IMPRESSIONS', df['CNT'].iloc[0]])

query = "SELECT COUNT(*) as cnt FROM CLICKS WHERE OCCURRED_AT >= '2025-07-01 00:00:00' AND OCCURRED_AT < '2025-07-02 01:00:00'"
df = run_query(query)
counts.append(['CLICKS', df['CNT'].iloc[0]])

query = "SELECT COUNT(*) as cnt FROM PURCHASES WHERE PURCHASED_AT >= '2025-07-01 00:00:00' AND PURCHASED_AT < '2025-07-02 01:00:00'"
df = run_query(query)
counts.append(['PURCHASES', df['CNT'].iloc[0]])

count_df = pd.DataFrame(counts, columns=['Table', 'Row Count (1 hour)'])
count_df['Row Count (1 hour)'] = count_df['Row Count (1 hour)'].apply(lambda x: f"{x:,}")
show_table(count_df, "Table Row Counts (2025-07-01 00:00-01:00)")


Table Row Counts (2025-07-01 00:00-01:00)
+------------------+----------------------+
| Table            | Row Count (1 hour)   |
| AUCTIONS_USERS   | 7,173,906            |
+------------------+----------------------+
| AUCTIONS_RESULTS | 260,352,025          |
+------------------+----------------------+
| IMPRESSIONS      | 27,272,669           |
+------------------+----------------------+
| CLICKS           | 890,435              |
+------------------+----------------------+
| PURCHASES        | 147,642              |
+------------------+----------------------+


## 3. Rank Distribution

In [None]:
# Rank distribution for 10 minute window
query = """
SELECT 
    RANKING as Rank,
    COUNT(*) as Total_Bids,
    SUM(CASE WHEN IS_WINNER THEN 1 ELSE 0 END) as Winners,
    ROUND(100.0 * SUM(CASE WHEN IS_WINNER THEN 1 ELSE 0 END) / COUNT(*), 1) as Win_Rate_Pct
FROM AUCTIONS_RESULTS 
WHERE CREATED_AT >= '2025-07-01 00:00:00' 
  AND CREATED_AT < '2025-07-01 00:10:00'
  AND RANKING <= 40
GROUP BY RANKING
ORDER BY RANKING
"""

df = run_query(query)
show_table(df, "Rank Distribution (10 min sample)")


Rank Distribution (10 min sample)
+--------+--------------+-----------+----------------+
|   RANK |   TOTAL_BIDS |   WINNERS |   WIN_RATE_PCT |
|      1 |        67262 |     66985 |           99.6 |
+--------+--------------+-----------+----------------+
|      2 |        66713 |     66098 |           99.1 |
+--------+--------------+-----------+----------------+
|      3 |        66302 |     65334 |           98.5 |
+--------+--------------+-----------+----------------+
|      4 |        65992 |     64738 |           98.1 |
+--------+--------------+-----------+----------------+
|      5 |        65772 |     64233 |           97.7 |
+--------+--------------+-----------+----------------+
|      6 |        65551 |     63763 |           97.3 |
+--------+--------------+-----------+----------------+
|      7 |        65329 |     63289 |           96.9 |
+--------+--------------+-----------+----------------+
|      8 |        65156 |     62882 |           96.5 |
+--------+--------------+-----

In [None]:
# Simple CTR calculation
query = """
SELECT 
    'Impressions' as Event_Type,
    COUNT(*) as Count
FROM IMPRESSIONS 
WHERE OCCURRED_AT >= '2025-07-01 00:00:00' AND OCCURRED_AT < '2025-07-02 01:00:00'
UNION ALL
SELECT 
    'Clicks' as Event_Type,
    COUNT(*) as Count
FROM CLICKS 
WHERE OCCURRED_AT >= '2025-07-01 00:00:00' AND OCCURRED_AT < '2025-07-02 01:00:00'
"""

df = run_query(query)
imp_count = df[df['EVENT_TYPE'] == 'Impressions']['COUNT'].iloc[0]
click_count = df[df['EVENT_TYPE'] == 'Clicks']['COUNT'].iloc[0]
ctr = (click_count / imp_count * 100) if imp_count > 0 else 0

ctr_df = pd.DataFrame([
    ['Impressions', f"{imp_count:,}"],
    ['Clicks', f"{click_count:,}"],
    ['CTR', f"{ctr:.2f}%"]
], columns=['Metric', 'Value'])

show_table(ctr_df, "Click-Through Rate (1 hour)")


Click-Through Rate (1 hour)
+-------------+------------+
| Metric      | Value      |
| Impressions | 27,272,669 |
+-------------+------------+
| Clicks      | 890,435    |
+-------------+------------+
| CTR         | 3.26%      |
+-------------+------------+


In [None]:
# Purchase stats
query = """
SELECT 
    COUNT(DISTINCT PURCHASE_ID) as Purchases,
    COUNT(DISTINCT USER_ID) as Unique_Users,
    COUNT(DISTINCT PRODUCT_ID) as Unique_Products,
    SUM(QUANTITY) as Total_Quantity,
    ROUND(SUM(QUANTITY * UNIT_PRICE), 2) as Total_Revenue
FROM PURCHASES
WHERE PURCHASED_AT >= '2025-07-01 00:00:00' 
  AND PURCHASED_AT < '2025-07-02 01:00:00'
"""

df = run_query(query)
if not df.empty:
    # Transpose for better display
    stats = [
        ['Total Purchases', f"{df['PURCHASES'].iloc[0]:,}"],
        ['Unique Users', f"{df['UNIQUE_USERS'].iloc[0]:,}"],
        ['Unique Products', f"{df['UNIQUE_PRODUCTS'].iloc[0]:,}"],
        ['Total Quantity', f"{df['TOTAL_QUANTITY'].iloc[0]:,}"],
        ['Total Revenue', f"${df['TOTAL_REVENUE'].iloc[0]:,.2f}"]
    ]
    stats_df = pd.DataFrame(stats, columns=['Metric', 'Value'])
    show_table(stats_df, "Purchase Statistics (1 hour)")


Purchase Statistics (1 hour)
+-----------------+-----------------+
| Metric          | Value           |
| Total Purchases | 117,886         |
+-----------------+-----------------+
| Unique Users    | 88,932          |
+-----------------+-----------------+
| Unique Products | 132,988         |
+-----------------+-----------------+
| Total Quantity  | 147,642         |
+-----------------+-----------------+
| Total Revenue   | $529,325,960.00 |
+-----------------+-----------------+


In [None]:
# Build simple funnel
query = """
SELECT 
    (SELECT COUNT(*) FROM AUCTIONS_USERS WHERE CREATED_AT >= '2025-07-01 00:00:00' AND CREATED_AT < '2025-07-01 00:30:00') as Auctions,
    (SELECT COUNT(*) FROM AUCTIONS_RESULTS WHERE CREATED_AT >= '2025-07-01 00:00:00' AND CREATED_AT < '2025-07-01 00:30:00' AND IS_WINNER = TRUE) as Winners,
    (SELECT COUNT(*) FROM IMPRESSIONS WHERE OCCURRED_AT >= '2025-07-01 00:00:00' AND OCCURRED_AT < '2025-07-01 00:30:00') as Impressions,
    (SELECT COUNT(*) FROM CLICKS WHERE OCCURRED_AT >= '2025-07-01 00:00:00' AND OCCURRED_AT < '2025-07-01 00:30:00') as Clicks,
    (SELECT COUNT(*) FROM PURCHASES WHERE PURCHASED_AT >= '2025-07-01 00:00:00' AND PURCHASED_AT < '2025-07-01 00:30:00') as Purchases
"""

df = run_query(query)
if not df.empty:
    funnel = [
        ['Auctions', df['AUCTIONS'].iloc[0], '100.0%'],
        ['Winners', df['WINNERS'].iloc[0], f"{100*df['WINNERS'].iloc[0]/df['AUCTIONS'].iloc[0]:.1f}%"],
        ['Impressions', df['IMPRESSIONS'].iloc[0], f"{100*df['IMPRESSIONS'].iloc[0]/df['WINNERS'].iloc[0]:.1f}%"],
        ['Clicks', df['CLICKS'].iloc[0], f"{100*df['CLICKS'].iloc[0]/df['IMPRESSIONS'].iloc[0]:.1f}%"],
        ['Purchases', df['PURCHASES'].iloc[0], f"{100*df['PURCHASES'].iloc[0]/df['CLICKS'].iloc[0]:.1f}%"]
    ]
    funnel_df = pd.DataFrame(funnel, columns=['Stage', 'Count', 'Conversion'])
    funnel_df['Count'] = funnel_df['Count'].apply(lambda x: f"{x:,}")
    show_table(funnel_df, "Conversion Funnel (30 min sample)")

## Clicks and Purchases only

In [None]:
import os
import pandas as pd
from tabulate import tabulate
from dotenv import load_dotenv
import snowflake.connector
import sys

load_dotenv()

conn = snowflake.connector.connect(
    user=os.getenv('SNOWFLAKE_USER'),
    password=os.getenv('SNOWFLAKE_PASSWORD'),
    account=os.getenv('SNOWFLAKE_ACCOUNT'),
    warehouse=os.getenv('SNOWFLAKE_WAREHOUSE', 'COMPUTE_WH'),
    database='INCREMENTALITY',
    schema='INCREMENTALITY_RESEARCH'
)
cursor = conn.cursor()

def run_query(query):
    try:
        cursor.execute(query)
        if cursor.description:
            results = cursor.fetchall()
            columns = [desc[0] for desc in cursor.description]
            return pd.DataFrame(results, columns=columns)
        return pd.DataFrame()
    except snowflake.connector.ProgrammingError as e:
        print(f"\nERROR executing query:\n{query}\nDetails: {e}")
        raise

def show_table(df, title=""):
    if title:
        print(f"\n{title}")
        print("="*len(title))
    print(tabulate(df, headers='keys', tablefmt='grid', showindex=False))

print("✅ Connected to Snowflake")

WRITABLE_DATABASE = 'USER$PRANJAL'
WRITABLE_SCHEMA = 'PUBLIC'

try:
    run_query(f"USE DATABASE {WRITABLE_DATABASE};")
    run_query(f"USE SCHEMA {WRITABLE_DATABASE}.{WRITABLE_SCHEMA};")
    print(f"✅ Switched context to your writable database '{WRITABLE_DATABASE}' and schema '{WRITABLE_SCHEMA}'.")
except Exception as e:
    print(f"\nCRITICAL ERROR: Failed to switch to a writable database/schema.")
    print(f"Details: {e}")
    sys.exit(1)

PILOT_WEEK_START = '2025-07-01 00:00:00'
PILOT_WEEK_END = '2025-07-08 00:00:00'

print("\n--- Exploring Clicks and Purchases Tables ---")

✅ Connected to Snowflake
✅ Switched context to your writable database 'USER$PRANJAL' and schema 'PUBLIC'.

--- Exploring Clicks and Purchases Tables ---


In [None]:
# Schema for Clicks
print("\n--- Schema for Clicks ---")
query_clicks_schema = "DESCRIBE TABLE INCREMENTALITY.PUBLIC.CLICKS;"
df_clicks_schema = run_query(query_clicks_schema)
show_table(df_clicks_schema, "Clicks Table Schema")

# Sample Data from Clicks (within pilot week)
print("\n--- Sample Data from Clicks (Pilot Week) ---")
query_clicks_sample = f"""
SELECT *
FROM INCREMENTALITY.PUBLIC.CLICKS
WHERE OCCURRED_AT >= '{PILOT_WEEK_START}' AND OCCURRED_AT < '{PILOT_WEEK_END}'
LIMIT 5;
"""
df_clicks_sample = run_query(query_clicks_sample)
show_table(df_clicks_sample, "Clicks Sample (Pilot Week)")

# Row Count for Clicks (within pilot week)
print("\n--- Row Count for Clicks (Pilot Week) ---")
query_clicks_count = f"""
SELECT COUNT(*) as count
FROM INCREMENTALITY.PUBLIC.CLICKS
WHERE OCCURRED_AT >= '{PILOT_WEEK_START}' AND OCCURRED_AT < '{PILOT_WEEK_END}';
"""
df_clicks_count = run_query(query_clicks_count)
print(f"Total Clicks in Pilot Week: {df_clicks_count['COUNT'].iloc[0]:,}")



--- Schema for Clicks ---

--- Executing SQL Query ---
DESCRIBE TABLE INCREMENTALITY.PUBLIC.CLICKS;
---------------------------


--- Clicks Table Schema ---
 name          type   kind
COL_A       VARCHAR COLUMN
COL_B TIMESTAMP_NTZ COLUMN
COL_C        NUMBER COLUMN
---------------------------


--- Sample Data from Clicks (Pilot Week) ---

--- Executing SQL Query ---

SELECT *
FROM INCREMENTALITY.PUBLIC.CLICKS
WHERE OCCURRED_AT >= '2025-07-01 00:00:00' AND OCCURRED_AT < '2025-07-08 00:00:00'
LIMIT 5;

---------------------------


--- Clicks Sample (Pilot Week) ---
INTERACTION_ID AUCTION_ID PRODUCT_ID USER_ID CAMPAIGN_ID VENDOR_ID         OCCURRED_AT
            c1         a1         p1      u1        cam1        v1 2025-07-01 10:00:00
            c2         a2         p2      u2        cam2        v2 2025-07-01 10:05:00
---------------------------


--- Row Count for Clicks (Pilot Week) ---

--- Executing SQL Query ---

SELECT COUNT(*) as count
FROM INCREMENTALITY.PUBLIC.CLICKS
WHERE

In [None]:
print("\n--- Schema for Clicks ---")
query_clicks_schema = "DESCRIBE TABLE INCREMENTALITY.INCREMENTALITY_RESEARCH.CLICKS;"
df_clicks_schema = run_query(query_clicks_schema)
show_table(df_clicks_schema, "Clicks Table Schema")

print("\n--- Sample Data from Clicks (Pilot Week) ---")
query_clicks_sample = f"""
SELECT *
FROM INCREMENTALITY.INCREMENTALITY_RESEARCH.CLICKS
WHERE OCCURRED_AT >= '{PILOT_WEEK_START}' AND OCCURRED_AT < '{PILOT_WEEK_END}'
LIMIT 5;
"""
df_clicks_sample = run_query(query_clicks_sample)
show_table(df_clicks_sample, "Clicks Sample (Pilot Week)")

print("\n--- Row Count for Clicks (Pilot Week) ---")
query_clicks_count = f"""
SELECT COUNT(*) as count
FROM INCREMENTALITY.INCREMENTALITY_RESEARCH.CLICKS
WHERE OCCURRED_AT >= '{PILOT_WEEK_START}' AND OCCURRED_AT < '{PILOT_WEEK_END}';
"""
df_clicks_count = run_query(query_clicks_count)
print(f"Total Clicks in Pilot Week: {df_clicks_count['COUNT'].iloc[0]:,}")


--- Schema for Clicks ---

Clicks Table Schema
+----------------+-------------------+--------+---------+-----------+---------------+--------------+---------+--------------+-----------+---------------+------------------+
| name           | type              | kind   | null?   | default   | primary key   | unique key   | check   | expression   | comment   | policy name   | privacy domain   |
| INTERACTION_ID | VARCHAR(16777216) | COLUMN | Y       |           | N             | N            |         |              |           |               |                  |
+----------------+-------------------+--------+---------+-----------+---------------+--------------+---------+--------------+-----------+---------------+------------------+
| AUCTION_ID     | VARCHAR(16777216) | COLUMN | Y       |           | N             | N            |         |              |           |               |                  |
+----------------+-------------------+--------+---------+-----------+---------------+--

In [None]:
# Schema for Purchases
print("\n--- Schema for Purchases ---")
query_purchases_schema = "DESCRIBE TABLE INCREMENTALITY.INCREMENTALITY_RESEARCH.PURCHASES;"
df_purchases_schema = run_query(query_purchases_schema)
show_table(df_purchases_schema, "Purchases Table Schema")

# Sample Data from Purchases (within pilot week)
print("\n--- Sample Data from Purchases (Pilot Week) ---")
query_purchases_sample = f"""
SELECT *
FROM INCREMENTALITY.INCREMENTALITY_RESEARCH.PURCHASES
WHERE PURCHASED_AT >= '{PILOT_WEEK_START}' AND PURCHASED_AT < '{PILOT_WEEK_END}'
LIMIT 5;
"""
df_purchases_sample = run_query(query_purchases_sample)
show_table(df_purchases_sample, "Purchases Sample (Pilot Week)")

# Row Count for Purchases (within pilot week)
print("\n--- Row Count for Purchases (Pilot Week) ---")
query_purchases_count = f"""
SELECT COUNT(*) as count
FROM INCREMENTALITY.INCREMENTALITY_RESEARCH.PURCHASES
WHERE PURCHASED_AT >= '{PILOT_WEEK_START}' AND PURCHASED_AT < '{PILOT_WEEK_END}';
"""
df_purchases_count = run_query(query_purchases_count)

print(f"Total Purchases in Pilot Week: {df_purchases_count['COUNT'].iloc[0]:,}")


--- Schema for Purchases ---

Purchases Table Schema
+---------------+-------------------+--------+---------+-----------+---------------+--------------+---------+--------------+-----------+---------------+------------------+
| name          | type              | kind   | null?   | default   | primary key   | unique key   | check   | expression   | comment   | policy name   | privacy domain   |
| PURCHASE_ID   | VARCHAR(16777216) | COLUMN | Y       |           | N             | N            |         |              |           |               |                  |
+---------------+-------------------+--------+---------+-----------+---------------+--------------+---------+--------------+-----------+---------------+------------------+
| PURCHASED_AT  | TIMESTAMP_NTZ(9)  | COLUMN | N       |           | N             | N            |         |              |           |               |                  |
+---------------+-------------------+--------+---------+-----------+---------------+--

In [None]:
import os
import pandas as pd
from tabulate import tabulate
from dotenv import load_dotenv
import snowflake.connector
import sys

# --- Your provided Snowflake connection details ---
load_dotenv()
conn_explore = snowflake.connector.connect(
    user=os.getenv('SNOWFLAKE_USER'),
    password=os.getenv('SNOWFLAKE_PASSWORD'),
    account=os.getenv('SNOWFLAKE_ACCOUNT'),
    warehouse=os.getenv('SNOWFLAKE_WAREHOUSE', 'COMPUTE_WH'),
    database='INCREMENTALITY', # Connect to shared DB to query metadata
    schema='INCREMENTALITY_RESEARCH'
)
cursor_explore = conn_explore.cursor()

def run_explore_query(query):
    try:
        cursor_explore.execute(query)
        if cursor_explore.description:
            results = cursor_explore.fetchall()
            columns = [desc[0] for desc in cursor_explore.description]
            return pd.DataFrame(results, columns=columns)
        return pd.DataFrame()
    except Exception as e:
        print(f"Error running discovery query: {e}")
        return pd.DataFrame()

def show_table_explore(df, title=""):
    if title:
        print(f"\n{title}")
        print("="*len(title))
    print(tabulate(df, headers='keys', tablefmt='grid', showindex=False))

print("--- Advanced Discovery of Writable Databases & Schemas ---")

# 1. Get current role and default database/warehouse (if any)
print("\nYour Current Role and Connection Defaults:")
df_context = run_explore_query("SELECT CURRENT_ROLE(), CURRENT_DATABASE(), CURRENT_SCHEMA(), CURRENT_WAREHOUSE();")
show_table_explore(df_context, "Current Snowflake Context")
current_role = df_context['CURRENT_ROLE()'].iloc[0] if not df_context.empty else 'UNKNOWN_ROLE'

# 2. List all accessible databases
print(f"\nAll Databases accessible by role '{current_role}':")
df_all_dbs = run_explore_query("SHOW DATABASES;")
show_table_explore(df_all_dbs, "All Accessible Databases")
print("\nLook for databases that are NOT 'INCREMENTALITY', 'SNOWFLAKE', or 'USER$PRANJAL'.")
print("These are often named 'DEV', 'SANDBOX', 'ANALYTICS', 'SCRATCHPAD', or a team-specific name.")
print("The 'owner' column can also give a clue.")


# 3. For each non-shared, non-personal database, list schemas
print("\nExploring Schemas in potential writable databases:")
potential_writable_dbs = []
if not df_all_dbs.empty:
    for index, row in df_all_dbs.iterrows():
        db_name = row['name']
        db_kind = row['kind']
        db_origin = row['origin'] # Helps identify shared DBs
        if db_name not in ['INCREMENTALITY', 'SNOWFLAKE', 'USER$PRANJAL'] and not db_origin:
            potential_writable_dbs.append(db_name)

if potential_writable_dbs:
    for db in potential_writable_dbs:
        print(f"\nSchemas in database: {db}")
        df_schemas = run_explore_query(f"SHOW SCHEMAS IN DATABASE {db};")
        if not df_schemas.empty:
            show_table_explore(df_schemas, f"Schemas in {db}")
            print("Look for schemas like 'PUBLIC', 'DEV', 'TEST', or specific project schemas.")
        else:
            print(f"  (No schemas found or accessible in {db})")
else:
    print("\nNo obvious non-shared, non-personal databases found. You might need to ask your Snowflake admin for a scratchpad database.")

cursor_explore.close()
conn_explore.close()
print("\n--- Advanced Discovery complete. ---")
print("\n**IMPORTANT:** From the outputs above, carefully choose one database name and one schema name")
print("             where you believe your role has 'CREATE TABLE' permissions. Then, update")
print("             'WRITABLE_DATABASE' and 'WRITABLE_SCHEMA' in Cell 1 of the main script.")

--- Advanced Discovery of Writable Databases & Schemas ---

Your Current Role and Connection Defaults:

Current Snowflake Context
+------------------+----------------------+-------------------------+-----------------------+
| CURRENT_ROLE()   | CURRENT_DATABASE()   | CURRENT_SCHEMA()        | CURRENT_WAREHOUSE()   |
| PUBLIC           | INCREMENTALITY       | INCREMENTALITY_RESEARCH | COMPUTE_WH            |
+------------------+----------------------+-------------------------+-----------------------+

All Databases accessible by role 'PUBLIC':

All Accessible Databases
+----------------------------------+----------------+--------------+--------------+--------------------------------+--------------+-----------+-----------+------------------+-------------------+-------------------+---------------------+
| created_on                       | name           | is_default   | is_current   | origin                         | owner        | comment   | options   |   retention_time | kind        