In [1]:
import os
import pandas as pd
from tabulate import tabulate
from dotenv import load_dotenv
import snowflake.connector
import sys
# New import for date calculations
from datetime import datetime, timedelta

load_dotenv()

conn = snowflake.connector.connect(
    user=os.getenv('SNOWFLAKE_USER'),
    password=os.getenv('SNOWFLAKE_PASSWORD'),
    account=os.getenv('SNOWFLAKE_ACCOUNT'),
    warehouse=os.getenv('SNOWFLAKE_WAREHOUSE', 'COMPUTE_WH'),
    database='INCREMENTALITY',
    schema='INCREMENTALITY_RESEARCH'
)
cursor = conn.cursor()

def run_query(query):
    try:
        cursor.execute(query)
        if cursor.description:
            results = cursor.fetchall()
            columns = [desc[0] for desc in cursor.description]
            return pd.DataFrame(results, columns=columns)
        return pd.DataFrame()
    except snowflake.connector.ProgrammingError as e:
        print(f"\nERROR executing query:\n{query}\nDetails: {e}")
        raise

def show_table(df, title="", num_rows=None):
    if title:
        print(f"\n{title}")
        print("="*len(title))
    if df.empty:
        print("No data to display.")
    else:
        display_df = df.head(num_rows) if num_rows is not None else df
        print(tabulate(display_df, headers='keys', tablefmt='grid', showindex=False))

print("✅ Connected to Snowflake")

✅ Connected to Snowflake


# data

In [None]:
# Assuming 'cursor' and 'run_query' are already defined and working
# and 'show_table' is available for display.

print("\n--- Scrutinizing Raw Data Samples ---")
print("="*40)

# Sample from AUCTIONS_USERS
print("\nAUCTIONS_USERS Sample (showing AUCTION_ID and OPAQUE_USER_ID):")
au_sample_query = "SELECT * FROM AUCTIONS_USERS LIMIT 5;"
au_df = run_query(au_sample_query)
# For binary columns, convert to hex string in Python for display clarity
if 'AUCTION_ID' in au_df.columns and au_df['AUCTION_ID'].apply(lambda x: isinstance(x, bytearray)).any():
    au_df['AUCTION_ID'] = au_df['AUCTION_ID'].apply(lambda x: x.hex() if isinstance(x, bytearray) else x)
show_table(au_df, num_rows=5)


# Sample from AUCTIONS_RESULTS
print("\nAUCTIONS_RESULTS Sample (showing AUCTION_ID, VENDOR_ID, CAMPAIGN_ID, PRODUCT_ID):")
ar_sample_query = "SELECT * FROM AUCTIONS_RESULTS LIMIT 5;"
ar_df = run_query(ar_sample_query)
# Convert binary columns to hex string in Python for display clarity
binary_cols_ar = ['AUCTION_ID', 'VENDOR_ID', 'CAMPAIGN_ID']
for col in binary_cols_ar:
    if col in ar_df.columns and ar_df[col].apply(lambda x: isinstance(x, bytearray)).any():
        ar_df[col] = ar_df[col].apply(lambda x: x.hex() if isinstance(x, bytearray) else x)
show_table(ar_df, num_rows=5)


# Sample from IMPRESSIONS
# Crucially, inspect AUCTION_ID and VENDOR_ID formats here
print("\nIMPRESSIONS Sample (showing AUCTION_ID, USER_ID, VENDOR_ID, PRODUCT_ID):")
imp_sample_query = "SELECT * FROM IMPRESSIONS LIMIT 5;"
imp_df = run_query(imp_sample_query)
show_table(imp_df, num_rows=5) # These should be VARCHAR strings, check their format


# Sample from CLICKS
# Crucially, inspect AUCTION_ID and VENDOR_ID formats here
print("\nCLICKS Sample (showing AUCTION_ID, USER_ID, VENDOR_ID):")
click_sample_query = "SELECT * FROM CLICKS LIMIT 5;"
click_df = run_query(click_sample_query)
show_table(click_df, num_rows=5) # These should be VARCHAR strings, check their format


# Sample from PURCHASES (if applicable, for future funnel stages)
print("\nPURCHASES Sample (showing USER_ID, PRODUCT_ID):")
purchase_sample_query = "SELECT * FROM PURCHASES LIMIT 5;"
purchase_df = run_query(purchase_sample_query)
show_table(purchase_df, num_rows=5)


--- Scrutinizing Raw Data Samples ---

AUCTIONS_USERS Sample (showing AUCTION_ID and OPAQUE_USER_ID):
+----------------------------------+-------------------------------------------+----------------------------+
| AUCTION_ID                       | OPAQUE_USER_ID                            | CREATED_AT                 |
| 068647b9d19377ee9e04bb253af1acf1 | ext1:6195a609-e976-40c3-8fe0-9d71745685ea | 2025-07-02 00:21:49.090000 |
+----------------------------------+-------------------------------------------+----------------------------+
| 068647c5db6a7eecbb044beb8dde99ed | ext1:794369ee-f9cc-4650-9911-cd183d278c1d | 2025-07-02 00:25:01.755000 |
+----------------------------------+-------------------------------------------+----------------------------+
| 068647bf511271a0b704c80e0183e479 | ext1:d3f8ee19-e01a-4f8c-bc34-5279fd44621b | 2025-07-02 00:23:17.060000 |
+----------------------------------+-------------------------------------------+----------------------------+
| 068647bddc5e758

# funnel

In [20]:
import os
import pandas as pd
from tabulate import tabulate
from dotenv import load_dotenv
import snowflake.connector
import sys
from datetime import datetime, timedelta

# --- (Assume previous setup code for connection and helper functions is here) ---

# --- Define Time Windows ---
PAW_START = '2025-03-19 20:00:00'
PAW_END = '2025-03-19 20:03:00'
SAMPLE_AUCTION_COUNT = 100 # Reduced for a quick test run

# Define a single, direct buffer from Auction to Click
AUCTION_CLICK_BUFFER_START_MINUTES = 5
AUCTION_CLICK_BUFFER_END_MINUTES = 60

# Calculate the global search window for clicks based on the auction window
GLOBAL_CLICKS_SEARCH_START = f"DATEADD(minute, -{AUCTION_CLICK_BUFFER_START_MINUTES}, '{PAW_START}')"
GLOBAL_CLICKS_SEARCH_END = f"DATEADD(minute, {AUCTION_CLICK_BUFFER_END_MINUTES}, '{PAW_END}')"

print(f"\n--- Constructing Simplified Funnel (Auctions -> Clicks Only) ---")
print(f"   Primary Auction Window: {PAW_START} to {PAW_END}")
print(f"   Auction-Click Buffer: -{AUCTION_CLICK_BUFFER_START_MINUTES} to +{AUCTION_CLICK_BUFFER_END_MINUTES} minutes")

bid_funnel_query = f"""
WITH
SampledAuctionIDs AS (
    SELECT
        AR.AUCTION_ID
    FROM
        AUCTIONS_RESULTS AR SAMPLE ROW ({SAMPLE_AUCTION_COUNT} ROWS)
    WHERE
        AR.CREATED_AT >= '{PAW_START}'::TIMESTAMP_NTZ
        AND AR.CREATED_AT < '{PAW_END}'::TIMESTAMP_NTZ
),
PreFilteredAuctions AS (
    SELECT
        AU.OPAQUE_USER_ID AS USER_ID,
        AR.AUCTION_ID,
        AR.VENDOR_ID,
        AR.CAMPAIGN_ID,
        AR.PRODUCT_ID,
        AR.RANKING,
        AR.IS_WINNER,
        AR.CREATED_AT AS AUCTION_TS
    FROM
        AUCTIONS_USERS AU
    JOIN
        AUCTIONS_RESULTS AR ON AU.AUCTION_ID = AR.AUCTION_ID
    WHERE
        AR.AUCTION_ID IN (SELECT AUCTION_ID FROM SampledAuctionIDs)
),
PreFilteredClicks AS (
    SELECT
        REPLACE(C.AUCTION_ID, '-', '') AS AUCTION_ID, -- Remove hyphens to match auction hex format
        C.USER_ID,
        REPLACE(C.VENDOR_ID, '-', '') AS VENDOR_ID, -- Remove hyphens
        C.INTERACTION_ID,
        C.OCCURRED_AT
    FROM
        CLICKS C
    WHERE
        REPLACE(C.AUCTION_ID, '-', '') IN (SELECT HEX_ENCODE(AUCTION_ID) FROM SampledAuctionIDs)
        AND C.OCCURRED_AT >= {GLOBAL_CLICKS_SEARCH_START}::TIMESTAMP_NTZ
        AND C.OCCURRED_AT < {GLOBAL_CLICKS_SEARCH_END}::TIMESTAMP_NTZ
),
-- Simplified Aggregation: Go directly from winning bids to clicks
AggregatedClickOutcomesPerBid AS (
    SELECT
        PFA.AUCTION_ID,
        PFA.USER_ID,
        PFA.PRODUCT_ID,
        PFA.RANKING,
        PFA.CAMPAIGN_ID,
        PFA.VENDOR_ID,
        MIN(PFC.OCCURRED_AT) AS FIRST_CLICK_TS,
        COUNT(DISTINCT PFC.INTERACTION_ID) AS CLICK_CNT
    FROM
        PreFilteredAuctions PFA
    LEFT JOIN
        PreFilteredClicks PFC
        ON HEX_ENCODE(PFA.AUCTION_ID) = PFC.AUCTION_ID
        AND PFA.USER_ID = PFC.USER_ID
        AND HEX_ENCODE(PFA.VENDOR_ID) = PFC.VENDOR_ID
        AND PFC.OCCURRED_AT BETWEEN DATEADD(minute, -{AUCTION_CLICK_BUFFER_START_MINUTES}, PFA.AUCTION_TS)
                                AND DATEADD(minute, {AUCTION_CLICK_BUFFER_END_MINUTES}, PFA.AUCTION_TS)
    WHERE
        PFA.IS_WINNER = TRUE -- Only attribute clicks to winning bids
    GROUP BY 1, 2, 3, 4, 5, 6
)
-- Final Select: Join all bids back to the aggregated click outcomes
SELECT
    HEX_ENCODE(PFA.AUCTION_ID) AS AUCTIONID,
    PFA.USER_ID AS USERID,
    PFA.PRODUCT_ID AS PRODUCTID,
    HEX_ENCODE(PFA.CAMPAIGN_ID) AS CAMPAIGNID,
    HEX_ENCODE(PFA.VENDOR_ID) AS VENDORID,
    PFA.RANKING AS BIDRANK,
    PFA.IS_WINNER,
    PFA.AUCTION_TS AS AUCTIONTS,
    NULL AS FIRST_IMP_TS, -- Removed impression logic
    0 AS IMP_CNT,          -- Removed impression logic
    AIO.FIRST_CLICK_TS,
    COALESCE(AIO.CLICK_CNT, 0) AS CLICK_CNT,
    NULL AS FIRST_PURCHASE_TS,
    0 AS PURCHASE_CNT,
    0.0 AS REVENUE_AMT
FROM
    PreFilteredAuctions PFA
LEFT JOIN
    AggregatedClickOutcomesPerBid AIO
    ON PFA.AUCTION_ID = AIO.AUCTION_ID
    AND PFA.USER_ID = AIO.USER_ID
    AND PFA.PRODUCT_ID = AIO.PRODUCT_ID
    AND PFA.RANKING = AIO.RANKING
    AND PFA.CAMPAIGN_ID = AIO.CAMPAIGN_ID
    AND PFA.VENDOR_ID = AIO.VENDOR_ID
ORDER BY
    AUCTIONID, PFA.RANKING;
"""

try:
    print(f"\nExecuting simplified (Auctions -> Clicks) funnel query...")
    funnel_df = run_query(bid_funnel_query)
    print(f"\n✅ Successfully generated simplified funnel with {len(funnel_df):,} rows.")

    funnel_df.columns = [col.lower() for col in funnel_df.columns]
    
    show_table(funnel_df.head(10), f"Simplified Funnel Data Sample (Random {SAMPLE_AUCTION_COUNT} Auctions)")

except Exception as e:
    print(f"\nAn error occurred during simplified funnel generation: {e}")


--- Constructing Simplified Funnel (Auctions -> Clicks Only) ---
   Primary Auction Window: 2025-03-19 20:00:00 to 2025-03-19 20:03:00
   Auction-Click Buffer: -5 to +60 minutes

Executing simplified (Auctions -> Clicks) funnel query...

✅ Successfully generated simplified funnel with 4,981 rows.

Simplified Funnel Data Sample (Random 100 Auctions)
+----------------------------------+-------------------------------------------+--------------------------+----------------------------------+----------------------------------+-----------+-------------+----------------------------+----------------+-----------+------------------+-------------+---------------------+----------------+---------------+
| auctionid                        | userid                                    | productid                | campaignid                       | vendorid                         |   bidrank | is_winner   | auctionts                  | first_imp_ts   |   imp_cnt | first_click_ts   |   click_cnt | fir

In [12]:
# --- Exploratory Data Analysis (EDA) ---
print("\n--- Starting Exploratory Data Analysis ---")
print("="*40)

datetime_cols = ['auctionts', 'first_imp_ts', 'first_click_ts', 'first_purchase_ts']
for col in datetime_cols:
    if col in funnel_df.columns:
        funnel_df[col] = pd.to_datetime(funnel_df[col], errors='coerce')


# 1. DataFrame Overview
print("\nDataFrame Info:")
funnel_df.info()

print("\nBasic Statistics for Numerical Columns:")
show_table(funnel_df.describe().T, "DataFrame Description")

# 2. Key Counts and Unique Identifiers
print("\nKey Counts:")
total_bids = len(funnel_df)
unique_auction_ids = funnel_df['auctionid'].nunique()
unique_user_ids = funnel_df['userid'].nunique()
unique_vendor_ids = funnel_df['vendorid'].nunique()
unique_campaign_ids = funnel_df['campaignid'].nunique()
unique_product_ids = funnel_df['productid'].nunique()

print(f"Total Bids Processed: {total_bids:,}")
print(f"Unique Auction IDs: {unique_auction_ids:,}")
print(f"Unique User IDs: {unique_user_ids:,}")
print(f"Unique Vendor IDs: {unique_vendor_ids:,}")
print(f"Unique Campaign IDs: {unique_campaign_ids:,}")
print(f"Unique Product IDs: {unique_product_ids:,}")

# 3. Distribution of Bid Rank
print("\nDistribution of Bid Ranks:")
bid_rank_distribution = funnel_df['bidrank'].value_counts().sort_index().to_frame(name='bid_count')
bid_rank_distribution['percentage'] = (bid_rank_distribution['bid_count'] / total_bids * 100).round(2)
show_table(bid_rank_distribution, "Bid Rank Distribution")

# 4. Funnel Metrics (Overall)
print("\nOverall Funnel Metrics:")
bids_won = funnel_df['is_winner'].sum()
bids_to_impressions = funnel_df[funnel_df['imp_cnt'] > 0]
impressions_generated = bids_to_impressions['imp_cnt'].sum()
bids_to_clicks = funnel_df[funnel_df['click_cnt'] > 0]
clicks_generated = bids_to_clicks['click_cnt'].sum()

print(f"Total Bids: {total_bids:,}")
print(f"Bids that Won Auction: {bids_won:,} ({((bids_won/total_bids)*100):.2f}%)")
print(f"Bids that Led to >=1 Impression: {len(bids_to_impressions):,} ({((len(bids_to_impressions)/total_bids)*100):.2f}%)")
print(f"Total Impressions Generated (from winning bids): {impressions_generated:,}")

# Handle division by zero for CTR if no impressions
if impressions_generated > 0:
    print(f"Impressions that Led to >=1 Click: {len(bids_to_clicks):,} ({((len(bids_to_clicks)/impressions_generated)*100):.2f}% of Impressions)")
else:
    print(f"Impressions that Led to >=1 Click: {len(bids_to_clicks):,} (0.00% of Impressions - No impressions generated)")

print(f"Total Clicks Generated: {clicks_generated:,}")

win_rate = (bids_won / total_bids) if total_bids > 0 else 0
imp_rate_from_bid = (len(bids_to_impressions) / total_bids) if total_bids > 0 else 0
ctr_from_impression = (clicks_generated / impressions_generated) if impressions_generated > 0 else 0

print(f"\nCalculated Rates:")
print(f"  Bid Win Rate: {win_rate:.2%}")
print(f"  Impression Rate (from total bids): {imp_rate_from_bid:.2%}")
print(f"  Click-Through Rate (CTR, from total impressions): {ctr_from_impression:.2%}")

# 5. Funnel Metrics by Bid Rank
print("\nFunnel Metrics by Bid Rank:")
rank_summary = funnel_df.groupby('bidrank').agg(
    total_bids=('bidrank', 'count'),
    bids_won=('is_winner', 'sum'),
    bids_led_to_imp=('imp_cnt', lambda x: (x > 0).sum()),
    total_imps_generated=('imp_cnt', 'sum'),
    bids_led_to_click=('click_cnt', lambda x: (x > 0).sum()),
    total_clicks_generated=('click_cnt', 'sum')
)

rank_summary['win_rate'] = (rank_summary['bids_won'] / rank_summary['total_bids']).fillna(0)
rank_summary['imp_rate_from_bid'] = (rank_summary['bids_led_to_imp'] / rank_summary['total_bids']).fillna(0)
rank_summary['ctr_from_impression'] = (rank_summary['total_clicks_generated'] / rank_summary['total_imps_generated']).fillna(0)
rank_summary['avg_imps_per_bid_won'] = (rank_summary['total_imps_generated'] / rank_summary['bids_won']).fillna(0)

eda_df = rank_summary[['total_bids', 'bids_won', 'win_rate', 'bids_led_to_imp', 
                       'total_imps_generated', 'imp_rate_from_bid', 'avg_imps_per_bid_won', 
                       'bids_led_to_click', 'total_clicks_generated', 'ctr_from_impression']]

eda_df['win_rate'] = eda_df['win_rate'].apply(lambda x: f"{x:.2%}")
eda_df['imp_rate_from_bid'] = eda_df['imp_rate_from_bid'].apply(lambda x: f"{x:.2%}")
eda_df['ctr_from_impression'] = eda_df['ctr_from_impression'].apply(lambda x: f"{x:.2%}")
eda_df['avg_imps_per_bid_won'] = eda_df['avg_imps_per_bid_won'].round(2)

show_table(eda_df, "Funnel Metrics by Bid Rank")

# 6. Time-to-Event Analysis (if data available)
print("\nTime-to-Event Analysis (in seconds):")
time_to_imp = (funnel_df['first_imp_ts'] - funnel_df['auctionts']).dt.total_seconds().dropna()
time_imp_to_click = (funnel_df['first_click_ts'] - funnel_df['first_imp_ts']).dt.total_seconds().dropna()

if not time_to_imp.empty:
    print(f"  Average time from Auction to First Impression: {time_to_imp.mean():.2f} seconds")
    print(f"  Median time from Auction to First Impression: {time_to_imp.median():.2f} seconds")
else:
    print("  No data to calculate time from Auction to First Impression.")

if not time_imp_to_click.empty:
    print(f"  Average time from First Impression to First Click: {time_imp_to_click.mean():.2f} seconds")
    print(f"  Median time from First Impression to First Click: {time_imp_to_click.median():.2f} seconds")
else:
    print("  No data to calculate time from First Impression to First Click.")

print("\n--- EDA Complete ---")


--- Starting Exploratory Data Analysis ---

DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42317 entries, 0 to 42316
Data columns (total 15 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   auctionid          42317 non-null  object        
 1   userid             42317 non-null  object        
 2   productid          42317 non-null  object        
 3   campaignid         42317 non-null  object        
 4   vendorid           42317 non-null  object        
 5   bidrank            42317 non-null  int64         
 6   is_winner          42317 non-null  bool          
 7   auctionts          42317 non-null  datetime64[ns]
 8   first_imp_ts       0 non-null      datetime64[ns]
 9   imp_cnt            42317 non-null  int64         
 10  first_click_ts     0 non-null      datetime64[ns]
 11  click_cnt          42317 non-null  int64         
 12  first_purchase_ts  0 non-null      datetime64[ns]
 13  

# reverse funnel

In [None]:
# --- Configuration Parameters ---
# Primary Purchase Window (PPW) - Defines the core time range for purchases we're analyzing.
PPW_START = '2025-07-03 00:00:00'
PPW_END = '2025-07-04 00:00:00' # Exclusive end (i.e., data up to 2025-07-03 23:59:59)

# Look-back window for ad interactions BEFORE a purchase
LOOKBACK_WINDOW_HOURS = 24 * 7 # e.g., 7 days

# --- Calculate Global Search Bounds for Pre-filtering (for large tables) ---
# We need to pull impressions and clicks over a wider range
# to ensure we capture all potential interactions within the look-back window for any purchase.

# Impressions/Clicks search range:
# Start: PPW_START minus LOOKBACK_WINDOW_HOURS
GLOBAL_AD_INTERACTIONS_SEARCH_START = f"DATEADD(hour, -{LOOKBACK_WINDOW_HOURS}, '{PPW_START}')"
# End: PPW_END (interactions must happen BEFORE purchase)
GLOBAL_AD_INTERACTIONS_SEARCH_END = f"'{PPW_END}'"

print(f"\n--- Constructing Purchase-First Look-Back Funnel ---")
print(f"   Primary Purchase Window: {PPW_START} to {PPW_END} (exclusive)")
print(f"   Look-Back Window for Ads: {LOOKBACK_WINDOW_HOURS} hours prior to purchase")
print(f"   Global Ad Interaction Search Range: {GLOBAL_AD_INTERACTIONS_SEARCH_START} to {GLOBAL_AD_INTERACTIONS_SEARCH_END}")

# --- SQL Query for Purchase-First Look-Back ---
purchase_lookback_query = f"""
WITH
-- Stage 0: Pre-filter all raw data tables for performance
-- Filter Purchases to the main analysis window
TargetPurchases AS (
    SELECT
        PURCHASE_ID,
        PURCHASED_AT,
        PRODUCT_ID,
        QUANTITY,
        UNIT_PRICE,
        USER_ID,
        (QUANTITY * UNIT_PRICE) AS REVENUE_AMT
    FROM
        PURCHASES
    WHERE
        PURCHASED_AT >= '{PPW_START}'::TIMESTAMP_NTZ
        AND PURCHASED_AT < '{PPW_END}'::TIMESTAMP_NTZ
),
-- Filter and enrich Impressions that could be in a look-back window
PreFilteredImpressionsWithContext AS (
    SELECT
        I.INTERACTION_ID,
        I.AUCTION_ID,
        I.USER_ID,
        I.PRODUCT_ID,
        I.OCCURRED_AT AS IMPRESSION_TS,
        AR.RANKING AS BID_RANK,
        AR.CAMPAIGN_ID,
        AR.VENDOR_ID
    FROM
        IMPRESSIONS I
    JOIN
        AUCTIONS_RESULTS AR
        ON I.AUCTION_ID = AR.AUCTION_ID::VARCHAR -- Correct cast for AUCTIONS_RESULTS.AUCTION_ID (BINARY) vs IMPRESSIONS.AUCTION_ID (VARCHAR)
    WHERE
        I.OCCURRED_AT >= {GLOBAL_AD_INTERACTIONS_SEARCH_START}::TIMESTAMP_NTZ
        AND I.OCCURRED_AT < {GLOBAL_AD_INTERACTIONS_SEARCH_END}::TIMESTAMP_NTZ
),
-- Filter Clicks that could be in a look-back window
PreFilteredClicksWithContext AS (
    SELECT
        C.INTERACTION_ID,
        C.AUCTION_ID,
        C.USER_ID,
        C.OCCURRED_AT AS CLICK_TS,
        PFIC.BID_RANK, -- Link back to impression's context for rank
        PFIC.CAMPAIGN_ID,
        PFIC.VENDOR_ID,
        PFIC.PRODUCT_ID -- Link back to impression's product
    FROM
        CLICKS C
    JOIN PreFilteredImpressionsWithContext PFIC
        ON C.INTERACTION_ID = PFIC.INTERACTION_ID -- Clicks link to impressions by INTERACTION_ID
        AND C.USER_ID = PFIC.USER_ID -- Ensure same user for click
    WHERE
        C.OCCURRED_AT >= {GLOBAL_AD_INTERACTIONS_SEARCH_START}::TIMESTAMP_NTZ
        AND C.OCCURRED_AT < {GLOBAL_AD_INTERACTIONS_SEARCH_END}::TIMESTAMP_NTZ
),

-- Stage 1: Find all Impressions preceding each purchase within the look-back window
PrecedingImpressionsPerPurchase AS (
    SELECT
        TP.PURCHASE_ID,
        COUNT(DISTINCT PFIC.INTERACTION_ID) AS PRE_IMPRESSION_COUNT,
        MIN(PFIC.IMPRESSION_TS) AS PRE_IMPRESSION_FIRST_TS,
        MAX(PFIC.IMPRESSION_TS) AS PRE_IMPRESSION_LAST_TS,
        LISTAGG(DISTINCT PFIC.BID_RANK, ',') WITHIN GROUP (ORDER BY PFIC.BID_RANK) AS PRE_IMPRESSION_RANKS_LIST,
        LISTAGG(DISTINCT PFIC.INTERACTION_ID, ',') WITHIN GROUP (ORDER BY PFIC.IMPRESSION_TS) AS PRE_IMPRESSION_IDS_LIST
    FROM
        TargetPurchases TP
    JOIN
        PreFilteredImpressionsWithContext PFIC
        ON TP.USER_ID = PFIC.USER_ID
        AND TP.PRODUCT_ID = PFIC.PRODUCT_ID
        AND PFIC.IMPRESSION_TS < TP.PURCHASED_AT -- Impression must be strictly before purchase
        AND PFIC.IMPRESSION_TS >= DATEADD(hour, -{LOOKBACK_WINDOW_HOURS}, TP.PURCHASED_AT)
    GROUP BY
        TP.PURCHASE_ID
),

-- Stage 2: Find all Clicks preceding each purchase within the look-back window
PrecedingClicksPerPurchase AS (
    SELECT
        TP.PURCHASE_ID,
        COUNT(DISTINCT PFCC.INTERACTION_ID) AS PRE_CLICK_COUNT,
        MIN(PFCC.CLICK_TS) AS PRE_CLICK_FIRST_TS,
        MAX(PFCC.CLICK_TS) AS PRE_CLICK_LAST_TS,
        LISTAGG(DISTINCT PFCC.BID_RANK, ',') WITHIN GROUP (ORDER BY PFCC.BID_RANK) AS PRE_CLICK_RANKS_LIST,
        LISTAGG(DISTINCT PFCC.INTERACTION_ID, ',') WITHIN GROUP (ORDER BY PFCC.CLICK_TS) AS PRE_CLICK_IDS_LIST
    FROM
        TargetPurchases TP
    JOIN
        PreFilteredClicksWithContext PFCC
        ON TP.USER_ID = PFCC.USER_ID
        AND TP.PRODUCT_ID = PFCC.PRODUCT_ID
        AND PFCC.CLICK_TS < TP.PURCHASED_AT -- Click must be strictly before purchase
        AND PFCC.CLICK_TS >= DATEADD(hour, -{LOOKBACK_WINDOW_HOURS}, TP.PURCHASED_AT)
    GROUP BY
        TP.PURCHASE_ID
)

-- Final Stage: Join all purchase details with their preceding ad interactions
SELECT
    TP.PURCHASE_ID,
    TP.PURCHASED_AT,
    TP.USER_ID,
    TP.PRODUCT_ID,
    TP.QUANTITY,
    TP.UNIT_PRICE,
    TP.REVENUE_AMT,
    COALESCE(PIPP.PRE_IMPRESSION_COUNT, 0) AS PRE_IMPRESSION_COUNT,
    PIPP.PRE_IMPRESSION_FIRST_TS,
    PIPP.PRE_IMPRESSION_LAST_TS,
    PIPP.PRE_IMPRESSION_RANKS_LIST,
    COALESCE(PCPP.PRE_CLICK_COUNT, 0) AS PRE_CLICK_COUNT,
    PCPP.PRE_CLICK_FIRST_TS,
    PCPP.PRE_CLICK_LAST_TS,
    PCPP.PRE_CLICK_RANKS_LIST
FROM
    TargetPurchases TP
LEFT JOIN
    PrecedingImpressionsPerPurchase PIPP
    ON TP.PURCHASE_ID = PIPP.PURCHASE_ID
LEFT JOIN
    PrecedingClicksPerPurchase PCPP
    ON TP.PURCHASE_ID = PCPP.PURCHASE_ID
ORDER BY
    TP.PURCHASED_AT;
"""

try:
    print(f"\nExecuting purchase-first look-back query...")
    purchase_lookback_df = run_query(purchase_lookback_query)
    print(f"\n✅ Successfully generated purchase-first look-back data with {len(purchase_lookback_df):,} rows.")

    purchase_lookback_df.columns = [col.lower() for col in purchase_lookback_df.columns]
    
    show_table(purchase_lookback_df.head(10), "Purchase-First Look-Back Data Sample")

except Exception as e:
    print(f"\nAn error occurred during purchase-first look-back analysis: {e}")

# IMPORTANT: Do NOT close connection here as requested.
# If you are done with all analysis, you would typically add cursor.close() and conn.close()
# in a separate cell at the very end of your notebook/script.
# print("\nDisconnected from Snowflake.") # This line is commented out as per instruction.