# Ad Stock Panel Construction

This notebook processes the raw data from notebook 01 and engineers the ad stock features for causal analysis.

**Key Features:**
- Macro-session construction with 3-day inactivity threshold
- Ad stock calculation with exponential decay (λ=0.15, half-life ~4.5 days)
- User-vendor-product-session panel construction
- Comprehensive feature engineering for fixed effects LPM

In [None]:
import polars as pl
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime, timedelta
import json
import gc
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Initialize timing
start_time = datetime.now()
print(f"Script started at: {start_time}")

## 1. Configuration

In [None]:
# ===== CONFIGURATION =====

# Session Definition Parameters
SESSION_GAP_DAYS = 3  # New session starts after 3 days of user inactivity

# Ad Stock Parameters
AD_STOCK_DECAY_LAMBDA = 0.15  # Decay rate (half-life of ~4.5 days)
AD_STOCK_WEIGHT_IMPRESSION = 0.5  # Weight for impressions
AD_STOCK_WEIGHT_CLICK = 1.0  # Weight for clicks (higher signal)

# Prior History Window
PRIOR_USER_HISTORY_DAYS = 30  # Days of user history to calculate before each session

# Data Quality Parameters
WINSORIZE_PERCENTILE = 99  # Cap extreme values at this percentile

# Paths
DATA_DIR = Path('./data')
OUTPUT_FILE = DATA_DIR / 'adstock_analysis_panel.parquet'
METADATA_FILE = DATA_DIR / 'adstock_panel_metadata.json'

print("="*60)
print("AD STOCK CONFIGURATION")
print("="*60)
print(f"Session Definition:")
print(f"  Inactivity gap threshold: {SESSION_GAP_DAYS} days")
print(f"\nAd Stock Parameters:")
print(f"  Decay λ: {AD_STOCK_DECAY_LAMBDA} (half-life: {np.log(2)/AD_STOCK_DECAY_LAMBDA:.1f} days)")
print(f"  Impression weight: {AD_STOCK_WEIGHT_IMPRESSION}")
print(f"  Click weight: {AD_STOCK_WEIGHT_CLICK}")
print(f"\nOther Parameters:")
print(f"  Prior history window: {PRIOR_USER_HISTORY_DAYS} days")
print(f"  Winsorizing at: {WINSORIZE_PERCENTILE}th percentile")

## 2. Load Data

In [None]:
print("\n" + "="*80)
print("PHASE 1: DATA LOADING")
print("="*80)

# Load metadata
metadata_path = DATA_DIR / 'metadata_adstock.json'
if metadata_path.exists():
    with open(metadata_path, 'r') as f:
        metadata = json.load(f)
    print(f"\nLoading data from extraction: {metadata['timestamp']}")
    print(f"Period: {metadata['analysis_start_date']} to {metadata['analysis_end_date']}")
    print(f"Power vendors: {metadata['n_power_vendors']:,}")
    print(f"Power users: {metadata['n_power_users']:,}")
else:
    print("Warning: No metadata file found. Make sure to run notebook 01 first.")

# Load all parquet files using Polars
data_files = {
    'auctions_users': 'auctions_users_adstock.parquet',
    'impressions': 'impressions_adstock.parquet',
    'clicks': 'clicks_adstock.parquet',
    'purchases': 'purchases_adstock.parquet',
    'catalog': 'catalog_adstock.parquet',
    'power_vendors': 'power_vendors.parquet',
    'power_users': 'power_users.parquet'
}

data = {}
for name, filename in tqdm(data_files.items(), desc="Loading data files"):
    filepath = DATA_DIR / filename
    if filepath.exists():
        data[name] = pl.read_parquet(filepath)
        print(f"  {name}: {data[name].height:,} rows loaded")
    else:
        print(f"  Warning: {filename} not found")

## 3. Create Unified Event Stream

In [None]:
print("\n" + "="*80)
print("PHASE 2: CREATE UNIFIED EVENT STREAM")
print("="*80)

events_list = []

# Process impressions
print("\nProcessing impressions...")
impressions_events = data['impressions'].select([
    pl.col('user_id'),
    pl.col('vendor_id'),
    pl.col('product_id'),
    pl.col('impression_time').alias('timestamp'),
    pl.lit('impression').alias('event_type'),
    pl.col('auction_id')
])
events_list.append(impressions_events)

# Process clicks
print("Processing clicks...")
clicks_events = data['clicks'].select([
    pl.col('user_id'),
    pl.col('vendor_id'),
    pl.col('product_id'),
    pl.col('click_time').alias('timestamp'),
    pl.lit('click').alias('event_type'),
    pl.col('auction_id')
])
events_list.append(clicks_events)

# Process purchases
print("Processing purchases...")
purchases_events = data['purchases'].with_columns([
    (pl.col('quantity') * pl.col('unit_price')).alias('revenue')
]).select([
    pl.col('user_id'),
    pl.lit(None).cast(pl.Utf8).alias('vendor_id'),  # Purchases don't have vendor
    pl.col('product_id'),
    pl.col('purchase_time').alias('timestamp'),
    pl.lit('purchase').alias('event_type'),
    pl.lit(None).cast(pl.Utf8).alias('auction_id'),
    pl.col('revenue').cast(pl.Int64)
])
events_list.append(purchases_events)

# Combine all events
print("\nCombining all events...")
all_events = pl.concat(events_list, how="diagonal")

# Sort by user and time
print("Sorting events chronologically...")
all_events = all_events.sort(['user_id', 'timestamp'])

print(f"\nTotal events in stream: {all_events.height:,}")
print(f"Unique users: {all_events['user_id'].n_unique():,}")
print(f"Unique vendors: {all_events.filter(pl.col('vendor_id').is_not_null())['vendor_id'].n_unique():,}")
print(f"Date range: {all_events['timestamp'].min()} to {all_events['timestamp'].max()}")

# Event type breakdown
event_counts = all_events.group_by('event_type').count().sort('count', descending=True)
print("\nEvent type breakdown:")
for row in event_counts.iter_rows():
    print(f"  {row[0]}: {row[1]:,}")

# Free memory
del events_list
gc.collect()

## 4. Sessionization

In [None]:
print("\n" + "="*80)
print(f"PHASE 3: SESSIONIZATION (using {SESSION_GAP_DAYS}-day gap)")
print("="*80)

print("Calculating time gaps between user events...")

# Calculate time gaps between consecutive events for each user
all_events = all_events.with_columns(
    pl.col('timestamp').diff().over('user_id').alias('time_since_last_event')
)

# Identify session breakpoints
print("Identifying session breakpoints...")
all_events = all_events.with_columns(
    (
        pl.col('time_since_last_event').is_null() |
        (pl.col('time_since_last_event').dt.total_seconds() > (SESSION_GAP_DAYS * 86400))
    ).alias('is_new_session')
)

# Assign unique session IDs
print("Assigning unique session IDs...")
all_events = all_events.with_columns(
    pl.col('is_new_session').cast(pl.Int32).cum_sum().over('user_id').alias('session_num')
)

# Create final macro_session_id
all_events = all_events.with_columns(
    (pl.col('user_id') + '_s_' + pl.col('session_num').cast(pl.Utf8)).alias('macro_session_id')
)

# Clean up intermediate columns
all_events = all_events.drop(['time_since_last_event', 'is_new_session', 'session_num'])

# Calculate session metadata
session_times = all_events.group_by('macro_session_id').agg([
    pl.col('timestamp').min().alias('session_start'),
    pl.col('timestamp').max().alias('session_end'),
    pl.col('user_id').first()
])

print(f"\n✓ Successfully created {session_times.height:,} unique sessions")

# Session duration statistics
session_times = session_times.with_columns(
    ((pl.col('session_end') - pl.col('session_start')).dt.total_seconds() / 3600).alias('duration_hours')
)
avg_duration = session_times['duration_hours'].mean()
median_duration = session_times['duration_hours'].median()
print(f"  Average session duration: {avg_duration:.1f} hours")
print(f"  Median session duration: {median_duration:.1f} hours")

## 5. Calculate Ad Stock

This is the critical step where we calculate the decayed carryover effects of past advertising.

In [None]:
print("\n" + "="*80)
print("PHASE 4: AD STOCK CALCULATION")
print("="*80)

print(f"\nCalculating ad stock with parameters:")
print(f"  Decay λ = {AD_STOCK_DECAY_LAMBDA}")
print(f"  Impression weight = {AD_STOCK_WEIGHT_IMPRESSION}")
print(f"  Click weight = {AD_STOCK_WEIGHT_CLICK}")

# Filter to only ad events (impressions and clicks) for ad stock calculation
ad_events = all_events.filter(
    pl.col('event_type').is_in(['impression', 'click']) & 
    pl.col('vendor_id').is_not_null()
)

print(f"\nAd events for stock calculation: {ad_events.height:,}")

# For each session, calculate ad stock from all prior events
print("\nCalculating ad stock for each user-vendor-session combination...")

# Get unique user-vendor-session combinations
user_vendor_sessions = ad_events.group_by(['user_id', 'vendor_id', 'macro_session_id']).agg(
    pl.col('timestamp').min().alias('session_start')
).sort(['user_id', 'vendor_id', 'session_start'])

print(f"  Unique (user, vendor, session) combinations: {user_vendor_sessions.height:,}")

# Function to calculate ad stock for a batch of sessions
def calculate_ad_stock_vectorized(ad_events_df, sessions_df):
    """Calculate ad stock using vectorized operations."""
    
    # Join sessions with all prior ad events
    joined = sessions_df.join(
        ad_events_df.select(['user_id', 'vendor_id', 'timestamp', 'event_type']),
        on=['user_id', 'vendor_id'],
        how='inner'
    )
    
    # Filter to only prior events
    prior_events = joined.filter(
        pl.col('timestamp') < pl.col('session_start')
    )
    
    # Calculate time difference and decay
    prior_events = prior_events.with_columns([
        ((pl.col('session_start') - pl.col('timestamp')).dt.total_seconds() / 86400).alias('days_ago'),
    ])
    
    prior_events = prior_events.with_columns([
        (-AD_STOCK_DECAY_LAMBDA * pl.col('days_ago')).exp().alias('decay_factor'),
        pl.when(pl.col('event_type') == 'impression')
          .then(AD_STOCK_WEIGHT_IMPRESSION)
          .otherwise(AD_STOCK_WEIGHT_CLICK)
          .alias('event_weight')
    ])
    
    # Calculate weighted decayed value
    prior_events = prior_events.with_columns(
        (pl.col('event_weight') * pl.col('decay_factor')).alias('ad_stock_contribution')
    )
    
    # Aggregate to get total ad stock
    ad_stock_by_session = prior_events.group_by(['user_id', 'vendor_id', 'macro_session_id']).agg(
        pl.col('ad_stock_contribution').sum().alias('ad_stock'),
        pl.col('event_type').filter(pl.col('event_type') == 'impression').count().alias('prior_impressions'),
        pl.col('event_type').filter(pl.col('event_type') == 'click').count().alias('prior_clicks')
    )
    
    return ad_stock_by_session

# Calculate ad stock in batches to manage memory
print("\nCalculating ad stock (this may take a few minutes)...")
ad_stock_results = calculate_ad_stock_vectorized(ad_events, user_vendor_sessions)

# Add ad stock back to the session metadata
user_vendor_sessions = user_vendor_sessions.join(
    ad_stock_results,
    on=['user_id', 'vendor_id', 'macro_session_id'],
    how='left'
)

# Fill nulls with 0 for sessions with no prior history
user_vendor_sessions = user_vendor_sessions.with_columns([
    pl.col('ad_stock').fill_null(0),
    pl.col('prior_impressions').fill_null(0),
    pl.col('prior_clicks').fill_null(0)
])

print(f"\n✓ Ad stock calculation complete")
print(f"  Sessions with ad stock > 0: {(user_vendor_sessions['ad_stock'] > 0).sum():,}")
print(f"  Mean ad stock: {user_vendor_sessions['ad_stock'].mean():.4f}")
print(f"  Max ad stock: {user_vendor_sessions['ad_stock'].max():.4f}")

## 6. Construct Analysis Panel

In [None]:
print("\n" + "="*80)
print("PHASE 5: PANEL CONSTRUCTION")
print("="*80)

# Create base panel from impressions (unit of analysis: user-vendor-product-session)
print("\nCreating base panel from impressions...")

impression_events = all_events.filter(
    (pl.col('event_type') == 'impression') & 
    pl.col('product_id').is_not_null() &
    pl.col('vendor_id').is_not_null()
)

# Create unique (user, vendor, product, session) combinations
base_panel = impression_events.group_by(['user_id', 'vendor_id', 'product_id', 'macro_session_id']).agg(
    pl.col('timestamp').min().alias('first_impression_time')
)

print(f"  Base panel created: {base_panel.height:,} observations")
print(f"  Unique users: {base_panel['user_id'].n_unique():,}")
print(f"  Unique vendors: {base_panel['vendor_id'].n_unique():,}")
print(f"  Unique products: {base_panel['product_id'].n_unique():,}")
print(f"  Unique sessions: {base_panel['macro_session_id'].n_unique():,}")

## 7. Add Within-Session Features

In [None]:
print("\n--- Adding within-session treatment variables ---")

# Count impressions and clicks for each product within each session
within_session_impressions = all_events.filter(
    pl.col('event_type') == 'impression'
).group_by(['user_id', 'vendor_id', 'product_id', 'macro_session_id']).agg(
    pl.count().alias('impressions_on_product')
)

within_session_clicks = all_events.filter(
    pl.col('event_type') == 'click'
).group_by(['user_id', 'vendor_id', 'product_id', 'macro_session_id']).agg(
    pl.count().alias('clicks_on_product')
)

# Merge into base panel
panel = base_panel.join(
    within_session_impressions,
    on=['user_id', 'vendor_id', 'product_id', 'macro_session_id'],
    how='left'
)

panel = panel.join(
    within_session_clicks,
    on=['user_id', 'vendor_id', 'product_id', 'macro_session_id'],
    how='left'
)

# Fill nulls
panel = panel.with_columns([
    pl.col('impressions_on_product').fill_null(0),
    pl.col('clicks_on_product').fill_null(0)
])

print(f"  Products with impressions: {(panel['impressions_on_product'] > 0).sum():,}")
print(f"  Products with clicks: {(panel['clicks_on_product'] > 0).sum():,}")
print(f"  Click-through rate: {(panel['clicks_on_product'] > 0).sum() / panel.height * 100:.2f}%")

## 8. Add Purchase Outcomes

In [None]:
print("\n--- Adding purchase outcomes ---")

# Get purchases within sessions
purchase_events = all_events.filter(
    pl.col('event_type') == 'purchase'
).select(['user_id', 'product_id', 'macro_session_id', 'revenue'])

# Aggregate purchases by product within session
purchases_by_product = purchase_events.group_by(['user_id', 'product_id', 'macro_session_id']).agg([
    pl.count().alias('purchases_of_product'),
    pl.col('revenue').sum().alias('revenue_from_product')
])

# Join to panel (note: vendor is not in purchases, so we join on user-product-session)
panel = panel.join(
    purchases_by_product,
    on=['user_id', 'product_id', 'macro_session_id'],
    how='left'
)

# Create binary purchase outcome and fill nulls
panel = panel.with_columns([
    (pl.col('purchases_of_product') > 0).cast(pl.Int8).fill_null(0).alias('purchased'),
    pl.col('purchases_of_product').fill_null(0),
    pl.col('revenue_from_product').fill_null(0)
])

print(f"  Sessions with purchases: {panel['purchased'].sum():,}")
print(f"  Purchase rate: {panel['purchased'].mean() * 100:.2f}%")
print(f"  Total revenue: ${panel['revenue_from_product'].sum():,.2f}")

## 9. Merge Ad Stock Features

In [None]:
print("\n--- Merging ad stock features ---")

# Join ad stock data to panel
panel = panel.join(
    user_vendor_sessions.select(['user_id', 'vendor_id', 'macro_session_id', 'ad_stock', 
                                 'prior_impressions', 'prior_clicks']),
    on=['user_id', 'vendor_id', 'macro_session_id'],
    how='left'
)

# Fill nulls for sessions with no prior history
panel = panel.with_columns([
    pl.col('ad_stock').fill_null(0),
    pl.col('prior_impressions').fill_null(0),
    pl.col('prior_clicks').fill_null(0)
])

print(f"  Observations with ad stock > 0: {(panel['ad_stock'] > 0).sum():,}")
print(f"  Mean ad stock: {panel['ad_stock'].mean():.4f}")
print(f"  Correlation between ad stock and purchase: {panel['ad_stock'].corr(panel['purchased']):.4f}")

## 10. Add Control Variables

In [None]:
print("\n--- Adding control variables ---")

# Add session metadata
panel = panel.join(
    session_times.select(['macro_session_id', 'session_start', 'session_end', 'duration_hours']),
    on='macro_session_id',
    how='left'
)

# Add catalog information (price)
catalog_features = data['catalog'].select(['product_id', 'catalog_price', 'brand', 'department_id'])
panel = panel.join(catalog_features, on='product_id', how='left')

# Create time-based fixed effects identifiers
panel = panel.with_columns([
    pl.col('session_start').dt.week().alias('week_of_year'),
    pl.col('session_start').dt.year().alias('year'),
    (pl.col('catalog_price').fill_null(pl.col('catalog_price').median())).alias('price')
])

# Log transformations
panel = panel.with_columns([
    (pl.col('price') + 1).log().alias('log_price'),
    (pl.col('duration_hours') + 1).log().alias('log_duration')
])

# Winsorize extreme durations
duration_cap = panel['duration_hours'].quantile(WINSORIZE_PERCENTILE / 100)
panel = panel.with_columns(
    pl.col('duration_hours').clip(upper_bound=duration_cap).alias('duration_winsorized')
)

# Calculate session-level activity metrics
session_activity = all_events.group_by(['user_id', 'macro_session_id']).agg([
    pl.col('product_id').n_unique().alias('distinct_products_viewed'),
    pl.col('event_type').count().alias('total_events_in_session')
])

panel = panel.join(
    session_activity,
    on=['user_id', 'macro_session_id'],
    how='left'
)

print(f"  Control variables added successfully")
print(f"  Final panel shape: {panel.height:,} rows × {panel.width} columns")

## 11. Add Prior 30-Day User Activity

In [None]:
print("\n--- Calculating prior 30-day user activity ---")

# Get unique user-session combinations with start times
user_sessions = panel.select(['user_id', 'macro_session_id', 'session_start']).unique()

# Prepare purchase events for historical calculation
historical_purchases = all_events.filter(
    pl.col('event_type') == 'purchase'
).select(['user_id', 'timestamp'])

# Join to find all purchases before each session
print("  Finding historical purchases...")
historical = user_sessions.join(
    historical_purchases,
    on='user_id',
    how='inner'
).filter(
    pl.col('timestamp') < pl.col('session_start')
)

# Filter to 30-day window
historical_30d = historical.filter(
    pl.col('timestamp') >= (pl.col('session_start') - pl.duration(days=PRIOR_USER_HISTORY_DAYS))
)

# Aggregate by session
prior_30d_purchases = historical_30d.group_by(['user_id', 'macro_session_id']).agg(
    pl.count().alias('prior_30d_purchases')
)

# Join back to panel
panel = panel.join(
    prior_30d_purchases,
    on=['user_id', 'macro_session_id'],
    how='left'
)

panel = panel.with_columns(
    pl.col('prior_30d_purchases').fill_null(0)
)

print(f"  Prior 30-day purchases calculated")
print(f"  Mean prior purchases: {panel['prior_30d_purchases'].mean():.2f}")

## 12. Final Feature Selection and Export

In [None]:
print("\n--- Finalizing dataset ---")

# Select final columns for the model
FINAL_COLUMNS = [
    # Identifiers
    'user_id',
    'vendor_id',
    'product_id',
    'macro_session_id',
    
    # Outcome
    'purchased',
    'revenue_from_product',
    
    # Treatment variables (within-session)
    'impressions_on_product',
    'clicks_on_product',
    
    # Ad stock (key variable)
    'ad_stock',
    'prior_impressions',
    'prior_clicks',
    
    # Control variables
    'log_price',
    'duration_winsorized',
    'distinct_products_viewed',
    'prior_30d_purchases',
    
    # Fixed effects identifiers
    'week_of_year',
    'year',
    
    # Additional metadata
    'session_start',
    'price',
    'brand',
    'department_id'
]

# Select only columns that exist
available_columns = [col for col in FINAL_COLUMNS if col in panel.columns]
final_panel = panel.select(available_columns)

print(f"\n✓ Final panel prepared with {len(available_columns)} columns")
print(f"  Shape: {final_panel.height:,} rows × {final_panel.width} columns")

# Save the panel
final_panel.write_parquet(OUTPUT_FILE)
print(f"\n✓ Panel saved to {OUTPUT_FILE}")
print(f"  File size: {OUTPUT_FILE.stat().st_size / (1024**2):.2f} MB")

## 13. Create Summary Statistics and Metadata

In [None]:
print("\n" + "="*80)
print("SUMMARY STATISTICS")
print("="*80)

# Calculate summary statistics
summary_stats = {
    'panel_shape': {'rows': final_panel.height, 'columns': final_panel.width},
    'unique_counts': {
        'users': final_panel['user_id'].n_unique(),
        'vendors': final_panel['vendor_id'].n_unique(),
        'products': final_panel['product_id'].n_unique(),
        'sessions': final_panel['macro_session_id'].n_unique()
    },
    'outcome_stats': {
        'purchase_rate': float(final_panel['purchased'].mean()),
        'total_revenue': float(final_panel['revenue_from_product'].sum()),
        'mean_revenue_if_purchased': float(
            final_panel.filter(pl.col('purchased') == 1)['revenue_from_product'].mean()
        ) if final_panel.filter(pl.col('purchased') == 1).height > 0 else 0
    },
    'treatment_stats': {
        'mean_impressions': float(final_panel['impressions_on_product'].mean()),
        'mean_clicks': float(final_panel['clicks_on_product'].mean()),
        'click_through_rate': float((final_panel['clicks_on_product'] > 0).mean())
    },
    'ad_stock_stats': {
        'mean_ad_stock': float(final_panel['ad_stock'].mean()),
        'pct_with_ad_stock': float((final_panel['ad_stock'] > 0).mean()),
        'correlation_with_purchase': float(final_panel['ad_stock'].corr(final_panel['purchased'])),
        '25th_percentile': float(final_panel['ad_stock'].quantile(0.25)),
        '50th_percentile': float(final_panel['ad_stock'].quantile(0.50)),
        '75th_percentile': float(final_panel['ad_stock'].quantile(0.75)),
        '95th_percentile': float(final_panel['ad_stock'].quantile(0.95))
    },
    'parameters': {
        'session_gap_days': SESSION_GAP_DAYS,
        'ad_stock_decay_lambda': AD_STOCK_DECAY_LAMBDA,
        'ad_stock_weight_impression': AD_STOCK_WEIGHT_IMPRESSION,
        'ad_stock_weight_click': AD_STOCK_WEIGHT_CLICK,
        'prior_history_days': PRIOR_USER_HISTORY_DAYS,
        'winsorize_percentile': WINSORIZE_PERCENTILE
    },
    'generated_at': datetime.now().isoformat()
}

# Print summary
print("\nPanel Statistics:")
print(f"  Observations: {summary_stats['panel_shape']['rows']:,}")
print(f"  Unique users: {summary_stats['unique_counts']['users']:,}")
print(f"  Unique vendors: {summary_stats['unique_counts']['vendors']:,}")
print(f"  Purchase rate: {summary_stats['outcome_stats']['purchase_rate']*100:.2f}%")

print("\nAd Stock Distribution:")
print(f"  Mean: {summary_stats['ad_stock_stats']['mean_ad_stock']:.4f}")
print(f"  Median: {summary_stats['ad_stock_stats']['50th_percentile']:.4f}")
print(f"  75th percentile: {summary_stats['ad_stock_stats']['75th_percentile']:.4f}")
print(f"  % with ad stock > 0: {summary_stats['ad_stock_stats']['pct_with_ad_stock']*100:.1f}%")
print(f"  Correlation with purchase: {summary_stats['ad_stock_stats']['correlation_with_purchase']:.4f}")

# Save metadata
with open(METADATA_FILE, 'w') as f:
    json.dump(summary_stats, f, indent=2)
print(f"\n✓ Metadata saved to {METADATA_FILE}")

## 14. Model Specification Preview

In [None]:
print("\n" + "="*80)
print("LINEAR PROBABILITY MODEL SPECIFICATION")
print("="*80)

print("""
Model Equation:
Purchased_uvps = β₁·AdStock_uvs + β₂·Clicks_uvps + β₃·Impressions_uvps + X'_uvps·Γ + α_u + δ_v + γ_t + ε_uvps

Where:
  - Purchased_uvps: Binary outcome (0/1)
  - AdStock_uvs: Decayed carryover effect from past sessions
  - Clicks_uvps: Within-session click count
  - Impressions_uvps: Within-session impression count
  - X'_uvps: Control variables (price, duration, etc.)
  - α_u: User fixed effects
  - δ_v: Vendor fixed effects
  - γ_t: Week fixed effects

Key Coefficient Interpretations:
  - β₁: Change in purchase probability per unit increase in ad stock (carryover effect)
  - β₂: Change in purchase probability per additional click (immediate effect)
  - β₃: Change in purchase probability per additional impression (immediate effect)

Next Steps:
  1. Estimate model using pyfixest or similar high-dimensional FE package
  2. Cluster standard errors at user level
  3. Test robustness with different decay parameters
  4. Calculate marginal effects and elasticities
""")

# Calculate runtime
end_time = datetime.now()
runtime = end_time - start_time
print(f"\nTotal runtime: {runtime}")
print("\n" + "="*80)
print("PANEL CONSTRUCTION COMPLETE")
print("="*80)