# EDA Analysis - Ad Platform Incrementality

This notebook performs comprehensive exploratory data analysis on the sessionized data generated by `eda_browsing_session.ipynb`.

**Output**: All results are displayed in the notebook AND saved to `eda_analysis_report.txt`

In [14]:
# %%
# --- SETUP ---
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime, timedelta
import warnings
from tqdm import tqdm

warnings.filterwarnings('ignore')

# Configuration
DATA_DIR = Path('./data')
REPORT_FILE = 'reports/eda_analysis_report.txt'  # Save to reports directory

# Report logger
class ReportLogger:
    def __init__(self, filename):
        self.filename = filename
        self.content = []
        self.content.append(f"="*80)
        self.content.append(f"EDA ANALYSIS REPORT")
        self.content.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        self.content.append(f"="*80)
        self.content.append("\n")
    
    def log(self, text):
        """Log text to both console and report buffer"""
        print(text)
        self.content.append(text)
    
    def save(self):
        """Save accumulated content to file"""
        with open(self.filename, 'w') as f:
            f.write('\n'.join(str(line) for line in self.content))
        print(f"\n[SUCCESS] Report saved to {self.filename}")

# Initialize report logger
report = ReportLogger(REPORT_FILE)
report.log("Starting EDA Analysis...\n")

Starting EDA Analysis...



In [15]:
# %%
# --- DATA LOADING ---
report.log("="*80)
report.log("SECTION 1: DATA LOADING AND VALIDATION")
report.log("="*80)

# Define expected files
data_files = {
    'shopping_sessions': 'shopping_sessions.parquet',
    'browsing_sessions': 'browsing_sessions.parquet',
    'auctions_users': 'raw_sample_auctions_users.parquet',
    'auctions_results': 'raw_sample_auctions_results.parquet',
    'impressions': 'raw_sample_impressions.parquet',
    'clicks': 'raw_sample_clicks.parquet',
    'purchases': 'raw_sample_purchases.parquet',
    'catalog': 'processed_sample_catalog.parquet'
}

# Load all datasets
datasets = {}
report.log("\nLoading datasets:")
for name, filename in tqdm(data_files.items(), desc="Loading data"):
    filepath = DATA_DIR / filename
    if filepath.exists():
        datasets[name] = pd.read_csv(filepath) if filepath.suffix == '.csv' else pd.read_parquet(filepath)
        shape = datasets[name].shape
        report.log(f"  - {name}: {shape[0]:,} rows, {shape[1]} columns")
    else:
        report.log(f"  - {name}: FILE NOT FOUND")

# Data validation
report.log("\nData Validation:")
report.log(f"  Total datasets loaded: {len(datasets)}")
raw_event_count = sum(len(df) for name, df in datasets.items() if 'raw' in data_files.get(name, ''))
report.log(f"  Total raw events: {raw_event_count:,}")

# Extract dataframes for easier access
df_shopping = datasets.get('shopping_sessions', pd.DataFrame())
df_browsing = datasets.get('browsing_sessions', pd.DataFrame())
df_auctions = datasets.get('auctions_users', pd.DataFrame())
df_bids = datasets.get('auctions_results', pd.DataFrame())
df_impressions = datasets.get('impressions', pd.DataFrame())
df_clicks = datasets.get('clicks', pd.DataFrame())
df_purchases = datasets.get('purchases', pd.DataFrame())
df_catalog = datasets.get('catalog', pd.DataFrame())

# Note: Prices are already in dollars (not cents) in the enhanced catalog
# The catalog preprocessing already handled the conversion
if 'PRICE' in df_catalog.columns:
    report.log("\n  Note: Catalog prices are in dollars")

if 'UNIT_PRICE' in df_purchases.columns:
    # Purchase prices still need conversion from cents
    df_purchases['UNIT_PRICE'] = df_purchases['UNIT_PRICE'] / 100
    report.log("  Note: Purchase unit prices converted from cents to dollars")

SECTION 1: DATA LOADING AND VALIDATION

Loading datasets:


Loading data:   0%|          | 0/8 [00:00<?, ?it/s]

  - shopping_sessions: 790 rows, 18 columns
  - browsing_sessions: 3,614 rows, 15 columns
  - auctions_users: 19,173 rows, 3 columns


Loading data:  50%|█████     | 4/8 [00:00<00:00, 12.69it/s]

  - auctions_results: 719,751 rows, 7 columns
  - impressions: 81,119 rows, 7 columns
  - clicks: 2,105 rows, 7 columns
  - purchases: 342 rows, 7 columns


Loading data: 100%|██████████| 8/8 [00:00<00:00, 10.13it/s]

  - catalog: 366,458 rows, 13 columns

Data Validation:
  Total datasets loaded: 8
  Total raw events: 822,490

  Note: Catalog prices are in dollars
  Note: Purchase unit prices converted from cents to dollars





In [16]:
# %%
# --- BASIC STATISTICS ---
report.log("\n" + "="*80)
report.log("SECTION 2: BASIC STATISTICS")
report.log("="*80)

# User statistics
report.log("\nUser Statistics:")
n_users_total = df_auctions['OPAQUE_USER_ID'].nunique() if 'OPAQUE_USER_ID' in df_auctions.columns else 0
n_users_shopping = df_shopping['user_id'].nunique() if 'user_id' in df_shopping.columns else 0
n_users_purchased = df_purchases['USER_ID'].nunique() if 'USER_ID' in df_purchases.columns else 0

report.log(f"  Total unique users: {n_users_total:,}")
report.log(f"  Users with shopping sessions: {n_users_shopping:,}")
report.log(f"  Users who purchased: {n_users_purchased:,}")
report.log(f"  Overall conversion rate: {n_users_purchased/n_users_total:.2%}" if n_users_total > 0 else "  Overall conversion rate: N/A")

# Session statistics
report.log("\nSession Statistics:")
report.log(f"  Total shopping sessions: {len(df_shopping):,}")
report.log(f"  Total browsing sessions: {len(df_browsing):,}")
if 'num_browsing_sessions' in df_shopping.columns:
    report.log(f"  Avg browsing sessions per shopping session: {df_shopping['num_browsing_sessions'].mean():.2f}")

# Auction statistics
report.log("\nAuction Statistics:")
report.log(f"  Total auctions: {len(df_auctions):,}")
report.log(f"  Total bids: {len(df_bids):,}")
if len(df_auctions) > 0:
    report.log(f"  Avg bids per auction: {len(df_bids)/len(df_auctions):.2f}")

# Product statistics
report.log("\nProduct Statistics:")
n_products = df_catalog['PRODUCT_ID'].nunique() if 'PRODUCT_ID' in df_catalog.columns else 0
n_vendors = df_bids['VENDOR_ID'].nunique() if 'VENDOR_ID' in df_bids.columns else 0
report.log(f"  Unique products in catalog: {n_products:,}")
report.log(f"  Unique vendors bidding: {n_vendors:,}")


SECTION 2: BASIC STATISTICS

User Statistics:
  Total unique users: 773
  Users with shopping sessions: 773
  Users who purchased: 137
  Overall conversion rate: 17.72%

Session Statistics:
  Total shopping sessions: 790
  Total browsing sessions: 3,614
  Avg browsing sessions per shopping session: 4.57

Auction Statistics:
  Total auctions: 19,173
  Total bids: 719,751
  Avg bids per auction: 37.54

Product Statistics:
  Unique products in catalog: 366,458
  Unique vendors bidding: 40,252


In [17]:
# %%
# --- SESSION-LEVEL ANALYSIS ---
report.log("\n" + "="*80)
report.log("SECTION 3: SESSION-LEVEL ANALYSIS")
report.log("="*80)

# Shopping session conversion analysis
if 'did_purchase' in df_shopping.columns:
    report.log("\nShopping Session Conversion Analysis:")
    conversion_rate = df_shopping['did_purchase'].mean()
    report.log(f"  Shopping session conversion rate: {conversion_rate:.2%}")
    
    # Conversion by number of browsing sessions
    if 'num_browsing_sessions' in df_shopping.columns:
        report.log("\n  Conversion by browsing session count:")
        conv_by_sessions = df_shopping.groupby('num_browsing_sessions')['did_purchase'].agg(['mean', 'count'])
        for idx, row in conv_by_sessions.head(10).iterrows():
            report.log(f"    {idx} browsing session(s): {row['mean']:.2%} (n={row['count']:,})")

# Browsing session duration analysis
if 'duration_minutes' in df_browsing.columns:
    report.log("\nBrowsing Session Duration Analysis:")
    duration_stats = df_browsing['duration_minutes'].describe()
    report.log(f"  Mean duration: {duration_stats['mean']:.2f} minutes")
    report.log(f"  Median duration: {duration_stats['50%']:.2f} minutes")
    report.log(f"  95th percentile: {df_browsing['duration_minutes'].quantile(0.95):.2f} minutes")

# Event distribution within sessions
if all(col in df_shopping.columns for col in ['total_auctions', 'total_impressions', 'total_clicks']):
    report.log("\nEvent Distribution in Shopping Sessions:")
    report.log(f"  Avg auctions per session: {df_shopping['total_auctions'].mean():.2f}")
    report.log(f"  Avg impressions per session: {df_shopping['total_impressions'].mean():.2f}")
    report.log(f"  Avg clicks per session: {df_shopping['total_clicks'].mean():.2f}")
    
    # CTR calculation
    sessions_with_impressions = df_shopping[df_shopping['total_impressions'] > 0]
    if len(sessions_with_impressions) > 0:
        avg_ctr = (sessions_with_impressions['total_clicks'] / sessions_with_impressions['total_impressions']).mean()
        report.log(f"  Avg CTR per session: {avg_ctr:.2%}")


SECTION 3: SESSION-LEVEL ANALYSIS

Shopping Session Conversion Analysis:
  Shopping session conversion rate: 17.34%

  Conversion by browsing session count:
    1 browsing session(s): 1.62% (n=371.0)
    2 browsing session(s): 14.91% (n=114.0)
    3 browsing session(s): 17.86% (n=56.0)
    4 browsing session(s): 22.45% (n=49.0)
    5 browsing session(s): 28.57% (n=35.0)
    6 browsing session(s): 25.00% (n=24.0)
    7 browsing session(s): 44.44% (n=18.0)
    8 browsing session(s): 29.41% (n=17.0)
    9 browsing session(s): 62.50% (n=16.0)
    10 browsing session(s): 42.86% (n=14.0)

Browsing Session Duration Analysis:
  Mean duration: 8.35 minutes
  Median duration: 1.49 minutes
  95th percentile: 39.71 minutes

Event Distribution in Shopping Sessions:
  Avg auctions per session: 24.27
  Avg impressions per session: 102.68
  Avg clicks per session: 2.66
  Avg CTR per session: 2.63%


In [18]:
# %%
# --- MARKETPLACE DYNAMICS ---
report.log("\n" + "="*80)
report.log("SECTION 4: MARKETPLACE DYNAMICS (AUCTION ANALYSIS)")
report.log("="*80)

# Auction competition analysis
if not df_bids.empty and 'AUCTION_ID' in df_bids.columns:
    report.log("\nAuction Competition Metrics:")
    
    # Bids per auction distribution
    bids_per_auction = df_bids.groupby('AUCTION_ID').size()
    report.log(f"  Avg bids per auction: {bids_per_auction.mean():.2f}")
    report.log(f"  Median bids per auction: {bids_per_auction.median():.0f}")
    report.log(f"  Max bids per auction: {bids_per_auction.max():.0f}")
    
    # Win rate by rank position
    if 'RANKING' in df_bids.columns and 'IS_WINNER' in df_bids.columns:
        report.log("\nWin Rate by Rank Position:")
        win_by_rank = df_bids.groupby('RANKING')['IS_WINNER'].agg(['mean', 'count'])
        for rank in range(1, min(11, len(win_by_rank) + 1)):
            if rank in win_by_rank.index:
                row = win_by_rank.loc[rank]
                report.log(f"  Rank {rank}: {row['mean']:.2%} win rate (n={row['count']:,})")
    
    # Vendor participation
    if 'VENDOR_ID' in df_bids.columns:
        report.log("\nVendor Participation:")
        vendor_bids = df_bids.groupby('VENDOR_ID').size().sort_values(ascending=False)
        report.log(f"  Total vendors: {len(vendor_bids):,}")
        report.log(f"  Top 5 vendors by bid volume:")
        for vendor_id, count in vendor_bids.head(5).items():
            pct = count / len(df_bids) * 100
            report.log(f"    {vendor_id[:8]}...: {count:,} bids ({pct:.1f}% of total)")

# Impression to click analysis
if not df_impressions.empty and not df_clicks.empty:
    report.log("\nImpression to Click Funnel:")
    n_impressions = len(df_impressions)
    n_clicks = len(df_clicks)
    overall_ctr = n_clicks / n_impressions if n_impressions > 0 else 0
    report.log(f"  Total impressions: {n_impressions:,}")
    report.log(f"  Total clicks: {n_clicks:,}")
    report.log(f"  Overall CTR: {overall_ctr:.2%}")


SECTION 4: MARKETPLACE DYNAMICS (AUCTION ANALYSIS)

Auction Competition Metrics:
  Avg bids per auction: 37.81
  Median bids per auction: 34
  Max bids per auction: 74

Win Rate by Rank Position:
  Rank 1: 98.84% win rate (n=19,038.0)
  Rank 2: 98.11% win rate (n=18,857.0)
  Rank 3: 97.40% win rate (n=18,731.0)
  Rank 4: 96.67% win rate (n=18,640.0)
  Rank 5: 96.09% win rate (n=18,553.0)
  Rank 6: 95.75% win rate (n=18,479.0)
  Rank 7: 95.31% win rate (n=18,415.0)
  Rank 8: 94.86% win rate (n=18,364.0)
  Rank 9: 94.45% win rate (n=18,313.0)
  Rank 10: 94.05% win rate (n=18,250.0)

Vendor Participation:
  Total vendors: 40,252
  Top 5 vendors by bid volume:
    064d8351...: 1,165 bids (0.2% of total)
    018e9bee...: 1,117 bids (0.2% of total)
    064df69b...: 884 bids (0.1% of total)
    0198b16c...: 711 bids (0.1% of total)
    06567cbe...: 689 bids (0.1% of total)

Impression to Click Funnel:
  Total impressions: 81,119
  Total clicks: 2,105
  Overall CTR: 2.59%


In [19]:
# %%
# --- USER BEHAVIOR PATTERNS ---
report.log("\n" + "="*80)
report.log("SECTION 5: USER BEHAVIOR PATTERNS")
report.log("="*80)

# User activity segmentation
if 'user_id' in df_shopping.columns:
    report.log("\nUser Activity Segmentation:")
    user_sessions = df_shopping.groupby('user_id').agg({
        'shopping_session_id': 'count',
        'did_purchase': 'sum',
        'total_revenue_usd': 'sum' if 'total_revenue_usd' in df_shopping.columns else 'first'
    }).rename(columns={'shopping_session_id': 'n_sessions', 'did_purchase': 'n_purchases'})
    
    # Classify users
    user_sessions['user_type'] = 'browser'
    user_sessions.loc[user_sessions['n_purchases'] == 1, 'user_type'] = 'single_purchaser'
    user_sessions.loc[user_sessions['n_purchases'] > 1, 'user_type'] = 'repeat_purchaser'
    
    user_type_dist = user_sessions['user_type'].value_counts()
    report.log(f"  Browsers: {user_type_dist.get('browser', 0):,} users")
    report.log(f"  Single purchasers: {user_type_dist.get('single_purchaser', 0):,} users")
    report.log(f"  Repeat purchasers: {user_type_dist.get('repeat_purchaser', 0):,} users")
    
    # Revenue concentration
    if 'total_revenue_usd' in df_shopping.columns:
        total_revenue = user_sessions['total_revenue_usd'].sum()
        if total_revenue > 0:
            top_10pct_users = int(len(user_sessions) * 0.1)
            top_users_revenue = user_sessions.nlargest(top_10pct_users, 'total_revenue_usd')['total_revenue_usd'].sum()
            report.log(f"\n  Revenue Concentration:")
            report.log(f"    Top 10% of users generate {top_users_revenue/total_revenue:.1%} of revenue")

# Purchase patterns
if not df_purchases.empty and 'PURCHASED_AT' in df_purchases.columns:
    report.log("\nPurchase Timing Patterns:")
    df_purchases['PURCHASED_AT'] = pd.to_datetime(df_purchases['PURCHASED_AT'])
    df_purchases['hour'] = df_purchases['PURCHASED_AT'].dt.hour
    df_purchases['day_of_week'] = df_purchases['PURCHASED_AT'].dt.dayofweek
    
    # Hourly distribution
    hourly_purchases = df_purchases['hour'].value_counts().sort_index()
    peak_hour = hourly_purchases.idxmax()
    report.log(f"  Peak purchase hour: {peak_hour}:00 ({hourly_purchases[peak_hour]:,} purchases)")
    
    # Day of week distribution (0=Monday, 6=Sunday)
    day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    daily_purchases = df_purchases['day_of_week'].value_counts().sort_index()
    peak_day = daily_purchases.idxmax()
    report.log(f"  Peak purchase day: {day_names[peak_day]} ({daily_purchases[peak_day]:,} purchases)")


SECTION 5: USER BEHAVIOR PATTERNS

User Activity Segmentation:
  Browsers: 636 users
  Single purchasers: 137 users
  Repeat purchasers: 0 users

  Revenue Concentration:
    Top 10% of users generate 86.8% of revenue

Purchase Timing Patterns:
  Peak purchase hour: 21:00 (69 purchases)
  Peak purchase day: Thursday (67 purchases)


In [20]:
# %%
# --- PRODUCT & CATEGORY ANALYSIS ---
report.log("\n" + "="*80)
report.log("SECTION 6: PRODUCT & CATEGORY ANALYSIS")
report.log("="*80)

# Top products by different metrics
if not df_purchases.empty and 'PRODUCT_ID' in df_purchases.columns:
    report.log("\nTop Products by Purchase Frequency:")
    product_purchases = df_purchases['PRODUCT_ID'].value_counts().head(10)
    for i, (product_id, count) in enumerate(product_purchases.items(), 1):
        report.log(f"  {i}. {product_id[:20]}...: {count:,} purchases")

# Product catalog analysis
if not df_catalog.empty:
    report.log("\nCatalog Price Analysis:")
    if 'PRICE' in df_catalog.columns:
        # Filter out unrealistic prices (likely data errors)
        reasonable_prices = df_catalog[df_catalog['PRICE'] < 1000]['PRICE']
        price_stats = reasonable_prices.describe()
        report.log(f"  Mean price: ${price_stats['mean']:.2f}")
        report.log(f"  Median price: ${price_stats['50%']:.2f}")
        report.log(f"  Price range: ${price_stats['min']:.2f} - ${price_stats['max']:.2f}")
        report.log(f"  Note: Excluded {len(df_catalog) - len(reasonable_prices):,} products with unrealistic prices (>$1000)")

        # Price distribution
        price_bins = [0, 10, 25, 50, 100, 1000]
        price_labels = ['$0-10', '$10-25', '$25-50', '$50-100', '$100+']
        df_catalog_filtered = df_catalog[df_catalog['PRICE'] < 1000].copy()
        df_catalog_filtered['price_range'] = pd.cut(df_catalog_filtered['PRICE'], bins=price_bins, labels=price_labels)
        price_dist = df_catalog_filtered['price_range'].value_counts()
        report.log("\n  Products by price range:")
        for range_label, count in price_dist.items():
            report.log(f"    {range_label}: {count:,} products ({count/len(df_catalog_filtered):.1%})")

    # Enhanced catalog field analysis
    report.log("\nEnhanced Catalog Analysis:")

    # Brand analysis
    if 'BRAND' in df_catalog.columns:
        n_brands = df_catalog['BRAND'].nunique()
        non_null_brands = df_catalog['BRAND'].notna().sum()
        report.log(f"\n  Brand Distribution:")
        report.log(f"    Total unique brands: {n_brands:,}")
        report.log(f"    Products with brand info: {non_null_brands:,} ({non_null_brands/len(df_catalog):.1%})")
        top_brands = df_catalog['BRAND'].value_counts().head(5)
        report.log("    Top 5 brands by product count:")
        for brand, count in top_brands.items():
            if pd.notna(brand):
                report.log(f"      {brand}: {count:,} products")

    # Department analysis
    if 'DEPARTMENT_ID' in df_catalog.columns:
        n_departments = df_catalog['DEPARTMENT_ID'].nunique()
        non_null_depts = df_catalog['DEPARTMENT_ID'].notna().sum()
        report.log(f"\n  Department Distribution:")
        report.log(f"    Total unique departments: {n_departments:,}")
        report.log(f"    Products with department: {non_null_depts:,} ({non_null_depts/len(df_catalog):.1%})")

    # Category analysis
    if 'CATEGORY_ID' in df_catalog.columns:
        n_categories = df_catalog['CATEGORY_ID'].nunique()
        non_null_cats = df_catalog['CATEGORY_ID'].notna().sum()
        report.log(f"\n  Category Distribution:")
        report.log(f"    Total unique categories: {n_categories:,}")
        report.log(f"    Products with category: {non_null_cats:,} ({non_null_cats/len(df_catalog):.1%})")

    # Color analysis
    if 'PRIMARY_COLOR' in df_catalog.columns:
        n_colors = df_catalog['PRIMARY_COLOR'].nunique()
        non_null_colors = df_catalog['PRIMARY_COLOR'].notna().sum()
        report.log(f"\n  Color Distribution:")
        report.log(f"    Total unique colors: {n_colors:,}")
        report.log(f"    Products with color: {non_null_colors:,} ({non_null_colors/len(df_catalog):.1%})")
        top_colors = df_catalog['PRIMARY_COLOR'].value_counts().head(5)
        report.log("    Top 5 colors:")
        for color, count in top_colors.items():
            if pd.notna(color):
                report.log(f"      {color}: {count:,} products")

    # Style tags analysis
    if 'STYLE_TAGS' in df_catalog.columns:
        non_null_styles = df_catalog['STYLE_TAGS'].notna().sum()
        non_empty_styles = (df_catalog['STYLE_TAGS'].notna() & (df_catalog['STYLE_TAGS'] != '')).sum()
        report.log(f"\n  Style Tags:")
        report.log(f"    Products with style tags: {non_empty_styles:,} ({non_empty_styles/len(df_catalog):.1%})")

# Click-through rate by product attributes
if not df_clicks.empty and not df_impressions.empty:
    report.log("\nProduct Performance Metrics:")
    if 'PRODUCT_ID' in df_clicks.columns and 'PRODUCT_ID' in df_impressions.columns:
        product_impressions = df_impressions['PRODUCT_ID'].value_counts()
        product_clicks = df_clicks['PRODUCT_ID'].value_counts()
        
        # CTR by product (for products with >10 impressions)
        product_ctr = (product_clicks / product_impressions).dropna()
        qualified_products = product_impressions[product_impressions > 10].index
        product_ctr = product_ctr[product_ctr.index.isin(qualified_products)]
        
        if len(product_ctr) > 0:
            report.log(f"  Products with CTR data: {len(product_ctr):,}")
            report.log(f"  Mean product CTR: {product_ctr.mean():.2%}")
            report.log(f"  Median product CTR: {product_ctr.median():.2%}")


SECTION 6: PRODUCT & CATEGORY ANALYSIS

Top Products by Purchase Frequency:
  1. 643d659974cb47c4f0cc...: 28 purchases
  2. 6703e2b89b37c7b6b662...: 17 purchases
  3. 64401cbfb2780c1796da...: 11 purchases
  4. 63efbcadddab402df594...: 4 purchases
  5. 68afb9f495538318707e...: 3 purchases
  6. 670fda75013d2a47a068...: 3 purchases
  7. 652560cc34d156f6ca9d...: 2 purchases
  8. 68b212a1a0dc2b8bf606...: 1 purchases
  9. 67f86f199f034caddc3f...: 1 purchases
  10. 68b987fb2c9e441b412c...: 1 purchases

Catalog Price Analysis:
  Mean price: $61.71
  Median price: $33.00
  Price range: $3.00 - $999.00
  Note: Excluded 1,695 products with unrealistic prices (>$1000)

  Products by price range:
    $10-25: 118,607 products (32.5%)
    $25-50: 114,592 products (31.4%)
    $50-100: 61,850 products (17.0%)
    $100+: 45,562 products (12.5%)
    $0-10: 24,152 products (6.6%)

Enhanced Catalog Analysis:

  Brand Distribution:
    Total unique brands: 29,846
    Products with brand info: 366,458 (100.

In [21]:
# %%
# --- CONVERSION FUNNEL ANALYSIS ---
report.log("\n" + "="*80)
report.log("SECTION 7: CONVERSION FUNNEL ANALYSIS")
report.log("="*80)

# Overall funnel metrics
report.log("\nOverall Funnel Metrics:")
funnel_metrics = {
    'Auctions': len(df_auctions),
    'Bids': len(df_bids),
    'Impressions': len(df_impressions),
    'Clicks': len(df_clicks),
    'Purchases': len(df_purchases)
}

prev_count = None
for stage, count in funnel_metrics.items():
    if prev_count is not None and prev_count > 0:
        conversion = count / prev_count
        report.log(f"  {stage}: {count:,} ({conversion:.2%} of previous stage)")
    else:
        report.log(f"  {stage}: {count:,}")
    prev_count = count if stage != 'Bids' else funnel_metrics['Impressions']  # Skip bids for funnel

# Session-level funnel
if all(col in df_shopping.columns for col in ['total_auctions', 'total_impressions', 'total_clicks', 'did_purchase']):
    report.log("\nSession-Level Funnel (Shopping Sessions):")
    
    sessions_with_auctions = (df_shopping['total_auctions'] > 0).sum()
    sessions_with_impressions = (df_shopping['total_impressions'] > 0).sum()
    sessions_with_clicks = (df_shopping['total_clicks'] > 0).sum()
    sessions_with_purchases = df_shopping['did_purchase'].sum()
    
    total_sessions = len(df_shopping)
    report.log(f"  Sessions with auctions: {sessions_with_auctions:,} ({sessions_with_auctions/total_sessions:.2%})")
    report.log(f"  Sessions with impressions: {sessions_with_impressions:,} ({sessions_with_impressions/total_sessions:.2%})")
    report.log(f"  Sessions with clicks: {sessions_with_clicks:,} ({sessions_with_clicks/total_sessions:.2%})")
    report.log(f"  Sessions with purchases: {sessions_with_purchases:,} ({sessions_with_purchases/total_sessions:.2%})")

# Revenue analysis
if 'total_revenue_usd' in df_shopping.columns:
    report.log("\nRevenue Metrics:")
    total_revenue = df_shopping['total_revenue_usd'].sum()
    converting_sessions = df_shopping[df_shopping['did_purchase'] == 1]
    
    report.log(f"  Total revenue: ${total_revenue:,.2f}")
    report.log(f"  Average order value: ${converting_sessions['total_revenue_usd'].mean():.2f}")
    report.log(f"  Revenue per shopping session: ${total_revenue/len(df_shopping):.2f}")
    
    # Revenue by session characteristics
    if 'num_browsing_sessions' in df_shopping.columns:
        report.log("\n  Revenue by browsing session count:")
        rev_by_browsing = df_shopping.groupby('num_browsing_sessions')['total_revenue_usd'].agg(['mean', 'sum', 'count'])
        for idx, row in rev_by_browsing.head(5).iterrows():
            report.log(f"    {idx} session(s): ${row['mean']:.2f} avg, ${row['sum']:,.2f} total (n={row['count']:,})")


SECTION 7: CONVERSION FUNNEL ANALYSIS

Overall Funnel Metrics:
  Auctions: 19,173
  Bids: 719,751 (3753.98% of previous stage)
  Impressions: 81,119 (100.00% of previous stage)
  Clicks: 2,105 (2.59% of previous stage)
  Purchases: 342 (16.25% of previous stage)

Session-Level Funnel (Shopping Sessions):
  Sessions with auctions: 790 (100.00%)
  Sessions with impressions: 679 (85.95%)
  Sessions with clicks: 307 (38.86%)
  Sessions with purchases: 137 (17.34%)

Revenue Metrics:
  Total revenue: $9,150.00
  Average order value: $66.79
  Revenue per shopping session: $11.58

  Revenue by browsing session count:
    1 session(s): $0.40 avg, $148.00 total (n=371.0)
    2 session(s): $9.07 avg, $1,034.00 total (n=114.0)
    3 session(s): $5.79 avg, $324.00 total (n=56.0)
    4 session(s): $10.33 avg, $506.00 total (n=49.0)
    5 session(s): $12.14 avg, $425.00 total (n=35.0)


In [22]:
# %%
# --- ADVANCED METRICS ---
report.log("\n" + "="*80)
report.log("SECTION 8: ADVANCED METRICS AND PATTERNS")
report.log("="*80)

# Auction efficiency metrics
if not df_bids.empty and 'IS_WINNER' in df_bids.columns:
    report.log("\nAuction Efficiency Metrics:")
    total_bids = len(df_bids)
    winning_bids = df_bids['IS_WINNER'].sum()
    win_rate = winning_bids / total_bids if total_bids > 0 else 0
    
    report.log(f"  Total bids: {total_bids:,}")
    report.log(f"  Winning bids: {winning_bids:,}")
    report.log(f"  Overall win rate: {win_rate:.2%}")
    
    # Average competition per auction
    if 'AUCTION_ID' in df_bids.columns:
        avg_competition = df_bids.groupby('AUCTION_ID').size().mean()
        report.log(f"  Average bidders per auction: {avg_competition:.2f}")

# Click concentration analysis
if not df_clicks.empty and 'PRODUCT_ID' in df_clicks.columns:
    report.log("\nClick Concentration:")
    product_clicks = df_clicks['PRODUCT_ID'].value_counts()
    total_clicks = len(df_clicks)
    top_10_products = product_clicks.head(10).sum()
    top_100_products = product_clicks.head(100).sum()
    
    report.log(f"  Top 10 products: {top_10_products:,} clicks ({top_10_products/total_clicks:.1%} of total)")
    report.log(f"  Top 100 products: {top_100_products:,} clicks ({top_100_products/total_clicks:.1%} of total)")

# Session complexity and conversion
if 'total_clicks' in df_shopping.columns and 'did_purchase' in df_shopping.columns:
    report.log("\nSession Complexity and Conversion:")
    
    # Define complexity bins
    df_shopping['click_bins'] = pd.cut(df_shopping['total_clicks'], 
                                        bins=[-0.1, 0, 1, 5, 10, float('inf')],
                                        labels=['0 clicks', '1 click', '2-5 clicks', '6-10 clicks', '10+ clicks'])
    
    complexity_conv = df_shopping.groupby('click_bins', observed=True)['did_purchase'].agg(['mean', 'count'])
    report.log("  Conversion by click complexity:")
    for idx, row in complexity_conv.iterrows():
        report.log(f"    {idx}: {row['mean']:.2%} conversion (n={row['count']:,})")

# Vendor performance metrics
if not df_bids.empty and 'VENDOR_ID' in df_bids.columns and 'IS_WINNER' in df_bids.columns:
    report.log("\nVendor Performance:")
    vendor_metrics = df_bids.groupby('VENDOR_ID').agg({
        'IS_WINNER': ['sum', 'mean'],
        'AUCTION_ID': 'count'
    })
    vendor_metrics.columns = ['wins', 'win_rate', 'total_bids']
    
    # Top performers by win rate (min 10 bids)
    qualified_vendors = vendor_metrics[vendor_metrics['total_bids'] >= 10]
    if len(qualified_vendors) > 0:
        top_win_rate = qualified_vendors.nlargest(5, 'win_rate')
        report.log("  Top 5 vendors by win rate (min 10 bids):")
        for vendor_id, row in top_win_rate.iterrows():
            report.log(f"    {str(vendor_id)[:8]}...: {row['win_rate']:.2%} ({row['wins']:.0f}/{row['total_bids']:.0f})")


SECTION 8: ADVANCED METRICS AND PATTERNS

Auction Efficiency Metrics:
  Total bids: 719,751
  Winning bids: 540,469
  Overall win rate: 75.09%
  Average bidders per auction: 37.81

Click Concentration:
  Top 10 products: 38 clicks (1.8% of total)
  Top 100 products: 240 clicks (11.4% of total)

Session Complexity and Conversion:
  Conversion by click complexity:
    0 clicks: 7.04% conversion (n=483.0)
    1 click: 20.75% conversion (n=106.0)
    2-5 clicks: 33.64% conversion (n=107.0)
    6-10 clicks: 29.27% conversion (n=41.0)
    10+ clicks: 62.26% conversion (n=53.0)

Vendor Performance:
  Top 5 vendors by win rate (min 10 bids):
    018e71f7...: 100.00% (14/14)
    018e7699...: 100.00% (12/12)
    018e9f74...: 100.00% (21/21)
    018ea0f2...: 100.00% (12/12)
    018ea16f...: 100.00% (11/11)


In [23]:
# %%
# --- DATA QUALITY ASSESSMENT ---
report.log("\n" + "="*80)
report.log("SECTION 9: DATA QUALITY ASSESSMENT")
report.log("="*80)

report.log("\nData Completeness:")

# Check for missing values in key columns
key_checks = [
    (df_shopping, 'user_id', 'Shopping Sessions'),
    (df_browsing, 'user_id', 'Browsing Sessions'),
    (df_bids, 'AUCTION_ID', 'Bids'),
    (df_impressions, 'USER_ID', 'Impressions'),
    (df_clicks, 'USER_ID', 'Clicks'),
    (df_purchases, 'USER_ID', 'Purchases')
]

for df, col, name in key_checks:
    if not df.empty and col in df.columns:
        missing = df[col].isna().sum()
        pct_missing = missing / len(df) * 100
        report.log(f"  {name} - {col}: {missing:,} missing ({pct_missing:.2f}%)")

# Date range validation
report.log("\nDate Range Coverage:")
date_columns = [
    (df_auctions, 'CREATED_AT', 'Auctions'),
    (df_impressions, 'OCCURRED_AT', 'Impressions'),
    (df_clicks, 'OCCURRED_AT', 'Clicks'),
    (df_purchases, 'PURCHASED_AT', 'Purchases')
]

for df, col, name in date_columns:
    if not df.empty and col in df.columns:
        try:
            dates = pd.to_datetime(df[col])
            date_range = (dates.max() - dates.min()).days
            report.log(f"  {name}: {dates.min().date()} to {dates.max().date()} ({date_range} days)")
        except:
            report.log(f"  {name}: Unable to parse dates")

# Join key availability
report.log("\nJoin Key Availability:")
if not df_bids.empty and not df_impressions.empty:
    # Check auction ID overlap
    if 'AUCTION_ID' in df_bids.columns and 'AUCTION_ID' in df_impressions.columns:
        bid_auctions = set(df_bids['AUCTION_ID'].dropna())
        impression_auctions = set(df_impressions['AUCTION_ID'].dropna())
        overlap = len(bid_auctions & impression_auctions)
        report.log(f"  Auction IDs in both bids and impressions: {overlap:,}")
        report.log(f"  Bids with matching impressions: {overlap/len(bid_auctions):.2%}" if bid_auctions else "N/A")


SECTION 9: DATA QUALITY ASSESSMENT

Data Completeness:
  Shopping Sessions - user_id: 0 missing (0.00%)
  Browsing Sessions - user_id: 0 missing (0.00%)
  Bids - AUCTION_ID: 0 missing (0.00%)
  Impressions - USER_ID: 0 missing (0.00%)
  Clicks - USER_ID: 0 missing (0.00%)
  Purchases - USER_ID: 0 missing (0.00%)

Date Range Coverage:
  Auctions: 2025-08-28 to 2025-09-06 (9 days)
  Impressions: 2025-08-28 to 2025-09-06 (9 days)
  Clicks: 2025-08-28 to 2025-09-06 (9 days)
  Purchases: 2025-08-28 to 2025-09-06 (9 days)

Join Key Availability:
  Auction IDs in both bids and impressions: 9,904
  Bids with matching impressions: 52.02%


In [24]:
# %%
# --- SUMMARY STATISTICS ---
report.log("\n" + "="*80)
report.log("SECTION 10: EXECUTIVE SUMMARY STATISTICS")
report.log("="*80)

# Key performance indicators
report.log("\nKey Performance Indicators:")

# Calculate KPIs
kpis = {}

# User metrics
if n_users_total > 0:
    kpis['Total Users'] = f"{n_users_total:,}"
    kpis['User Conversion Rate'] = f"{n_users_purchased/n_users_total:.2%}"

# Session metrics
if 'did_purchase' in df_shopping.columns:
    kpis['Shopping Sessions'] = f"{len(df_shopping):,}"
    kpis['Session Conversion Rate'] = f"{df_shopping['did_purchase'].mean():.2%}"

# Revenue metrics
if 'total_revenue_usd' in df_shopping.columns:
    total_rev = df_shopping['total_revenue_usd'].sum()
    kpis['Total Revenue'] = f"${total_rev:,.2f}"
    if df_shopping['did_purchase'].sum() > 0:
        aov = df_shopping[df_shopping['did_purchase']==1]['total_revenue_usd'].mean()
        kpis['Average Order Value'] = f"${aov:.2f}"

# Engagement metrics
if not df_clicks.empty and not df_impressions.empty:
    kpis['Overall CTR'] = f"{len(df_clicks)/len(df_impressions):.2%}"

# Marketplace metrics
if not df_bids.empty:
    kpis['Total Bids'] = f"{len(df_bids):,}"
    if 'IS_WINNER' in df_bids.columns:
        kpis['Bid Win Rate'] = f"{df_bids['IS_WINNER'].mean():.2%}"

# Print KPIs
for metric, value in kpis.items():
    report.log(f"  {metric}: {value}")

# Data volume summary
report.log("\nData Volume Summary:")
volumes = [
    ('Users', n_users_total),
    ('Shopping Sessions', len(df_shopping)),
    ('Browsing Sessions', len(df_browsing)),
    ('Auctions', len(df_auctions)),
    ('Bids', len(df_bids)),
    ('Impressions', len(df_impressions)),
    ('Clicks', len(df_clicks)),
    ('Purchases', len(df_purchases)),
    ('Products', len(df_catalog))
]

for name, count in volumes:
    report.log(f"  {name}: {count:,}")

report.log("\n" + "="*80)
report.log("END OF REPORT")
report.log("="*80)


SECTION 10: EXECUTIVE SUMMARY STATISTICS

Key Performance Indicators:
  Total Users: 773
  User Conversion Rate: 17.72%
  Shopping Sessions: 790
  Session Conversion Rate: 17.34%
  Total Revenue: $9,150.00
  Average Order Value: $66.79
  Overall CTR: 2.59%
  Total Bids: 719,751
  Bid Win Rate: 75.09%

Data Volume Summary:
  Users: 773
  Shopping Sessions: 790
  Browsing Sessions: 3,614
  Auctions: 19,173
  Bids: 719,751
  Impressions: 81,119
  Clicks: 2,105
  Purchases: 342
  Products: 366,458

END OF REPORT


In [25]:
# %%
# --- SAVE REPORT ---
report.save()
print("\n" + "="*80)
print("EDA Analysis Complete!")
print(f"Report saved to: {REPORT_FILE}")
print("="*80)


[SUCCESS] Report saved to reports/eda_analysis_report.txt

EDA Analysis Complete!
Report saved to: reports/eda_analysis_report.txt


In [26]:
# %%
# --- BROWSING & SHOPPING SESSION DEEP DIVE ---
report.log("\n" + "="*80)
report.log("SECTION 11: BROWSING & SHOPPING SESSION DEEP DIVE")
report.log("="*80)

# Browsing Session Analysis
if not df_browsing.empty:
    report.log("\nBrowsing Session Characteristics:")
    
    # Basic statistics
    report.log(f"  Total browsing sessions: {len(df_browsing):,}")
    report.log(f"  Unique users: {df_browsing['user_id'].nunique():,}")
    report.log(f"  Avg sessions per user: {len(df_browsing)/df_browsing['user_id'].nunique():.2f}")
    
    # Duration analysis
    if 'duration_minutes' in df_browsing.columns:
        # Categorize session lengths
        df_browsing['duration_category'] = pd.cut(
            df_browsing['duration_minutes'], 
            bins=[0, 1, 5, 15, 30, float('inf')],
            labels=['<1 min', '1-5 min', '5-15 min', '15-30 min', '30+ min']
        )
        duration_dist = df_browsing['duration_category'].value_counts()
        report.log("\n  Session Duration Distribution:")
        for cat, count in duration_dist.items():
            report.log(f"    {cat}: {count:,} sessions ({count/len(df_browsing):.1%})")
    
    # Event density analysis
    if all(col in df_browsing.columns for col in ['num_auctions', 'num_impressions', 'num_clicks']):
        report.log("\n  Event Density per Browsing Session:")
        report.log(f"    Avg auctions: {df_browsing['num_auctions'].mean():.2f}")
        report.log(f"    Avg impressions: {df_browsing['num_impressions'].mean():.2f}")
        report.log(f"    Avg clicks: {df_browsing['num_clicks'].mean():.2f}")
        
        # Sessions with no ad interactions
        no_ads = df_browsing[(df_browsing['num_impressions'] == 0)].shape[0]
        report.log(f"    Sessions with no ads: {no_ads:,} ({no_ads/len(df_browsing):.1%})")
    
    # Conversion within browsing sessions
    if 'did_purchase' in df_browsing.columns:
        browsing_conversions = df_browsing['did_purchase'].sum()
        report.log(f"\n  Browsing sessions with purchases: {browsing_conversions:,} ({browsing_conversions/len(df_browsing):.2%})")

# Shopping Session Analysis
if not df_shopping.empty:
    report.log("\nShopping Session Patterns:")
    
    # Session length distribution
    if 'session_duration_hours' in df_shopping.columns:
        report.log(f"\n  Session Duration (Shopping):")
        duration_stats = df_shopping['session_duration_hours'].describe()
        report.log(f"    Mean: {duration_stats['mean']:.2f} hours")
        report.log(f"    Median: {duration_stats['50%']:.2f} hours")
        report.log(f"    95th percentile: {df_shopping['session_duration_hours'].quantile(0.95):.2f} hours")
    
    # Multi-session user behavior
    user_session_counts = df_shopping['user_id'].value_counts()
    report.log(f"\n  Multi-Session User Behavior:")
    report.log(f"    Single-session users: {(user_session_counts == 1).sum():,}")
    report.log(f"    Multi-session users: {(user_session_counts > 1).sum():,}")
    report.log(f"    Max sessions per user: {user_session_counts.max()}")
    
    # Engagement progression
    if 'num_browsing_sessions' in df_shopping.columns and 'did_purchase' in df_shopping.columns:
        report.log(f"\n  Engagement to Conversion:")
        # Group by engagement level
        engagement_bins = pd.cut(
            df_shopping['num_browsing_sessions'],
            bins=[0, 1, 3, 5, 10, float('inf')],
            labels=['1 browse', '2-3 browse', '4-5 browse', '6-10 browse', '10+ browse']
        )
        engagement_conv = df_shopping.groupby(engagement_bins, observed=True)['did_purchase'].agg(['mean', 'count'])
        for idx, row in engagement_conv.iterrows():
            report.log(f"    {idx}: {row['mean']:.2%} conversion (n={row['count']:,})")
    
    # Ad exposure and conversion
    if all(col in df_shopping.columns for col in ['total_impressions', 'total_clicks', 'did_purchase']):
        report.log(f"\n  Ad Exposure Impact:")
        
        # No ads vs with ads
        no_ads_sessions = df_shopping[df_shopping['total_impressions'] == 0]
        with_ads_sessions = df_shopping[df_shopping['total_impressions'] > 0]
        
        if len(no_ads_sessions) > 0:
            no_ads_conv = no_ads_sessions['did_purchase'].mean()
            report.log(f"    Sessions without ads: {len(no_ads_sessions):,} ({no_ads_conv:.2%} conversion)")
        
        if len(with_ads_sessions) > 0:
            with_ads_conv = with_ads_sessions['did_purchase'].mean()
            report.log(f"    Sessions with ads: {len(with_ads_sessions):,} ({with_ads_conv:.2%} conversion)")
            
            # Incrementality estimate
            if len(no_ads_sessions) > 0 and len(with_ads_sessions) > 0:
                lift = (with_ads_conv - no_ads_conv) / no_ads_conv if no_ads_conv > 0 else 0
                report.log(f"    Estimated lift from ads: {lift:.1%}")

# Cross-session analysis
if not df_shopping.empty and not df_browsing.empty:
    report.log("\nCross-Session Insights:")
    
    # Browsing to shopping funnel
    total_browsing_users = df_browsing['user_id'].nunique()
    total_shopping_users = df_shopping['user_id'].nunique()
    report.log(f"  Browsing to shopping funnel:")
    report.log(f"    Users who browsed: {total_browsing_users:,}")
    report.log(f"    Users who shopped: {total_shopping_users:,}")
    report.log(f"    Browse-to-shop rate: {total_shopping_users/total_browsing_users:.2%}" if total_browsing_users > 0 else "    Browse-to-shop rate: N/A")
    
    # Time patterns
    if 'session_start' in df_shopping.columns:
        df_shopping['session_start'] = pd.to_datetime(df_shopping['session_start'])
        df_shopping['hour'] = df_shopping['session_start'].dt.hour
        df_shopping['weekday'] = df_shopping['session_start'].dt.dayofweek
        
        report.log(f"\n  Temporal Patterns (Shopping Sessions):")
        
        # Peak hours
        peak_hours = df_shopping['hour'].value_counts().head(3)
        report.log(f"    Top 3 active hours:")
        for hour, count in peak_hours.items():
            report.log(f"      {hour}:00-{hour+1}:00: {count:,} sessions")
        
        # Weekday vs weekend
        df_shopping['is_weekend'] = df_shopping['weekday'].isin([5, 6])
        weekend_sessions = df_shopping['is_weekend'].sum()
        weekday_sessions = len(df_shopping) - weekend_sessions
        report.log(f"    Weekday sessions: {weekday_sessions:,} ({weekday_sessions/len(df_shopping):.1%})")
        report.log(f"    Weekend sessions: {weekend_sessions:,} ({weekend_sessions/len(df_shopping):.1%})")


SECTION 11: BROWSING & SHOPPING SESSION DEEP DIVE

Browsing Session Characteristics:
  Total browsing sessions: 3,614
  Unique users: 773
  Avg sessions per user: 4.68

  Session Duration Distribution:
    <1 min: 867 sessions (24.0%)
    1-5 min: 794 sessions (22.0%)
    5-15 min: 558 sessions (15.4%)
    15-30 min: 357 sessions (9.9%)
    30+ min: 276 sessions (7.6%)

  Event Density per Browsing Session:
    Avg auctions: 5.31
    Avg impressions: 22.45
    Avg clicks: 0.58
    Sessions with no ads: 1,021 (28.3%)

Shopping Session Patterns:

  Multi-Session User Behavior:
    Single-session users: 756
    Multi-session users: 17
    Max sessions per user: 2

  Engagement to Conversion:
    1 browse: 1.62% conversion (n=371.0)
    2-3 browse: 15.88% conversion (n=170.0)
    4-5 browse: 25.00% conversion (n=84.0)
    6-10 browse: 39.33% conversion (n=89.0)
    10+ browse: 63.16% conversion (n=76.0)

  Ad Exposure Impact:
    Sessions without ads: 111 (4.50% conversion)
    Sessions w

In [27]:
# %%
# --- PURCHASE FUNNEL CONSISTENCY & ATTRIBUTION ---
report.log("\n" + "="*80)
report.log("SECTION 12: PURCHASE FUNNEL CONSISTENCY & ATTRIBUTION")
report.log("="*80)

# Question 1: Are purchased products in the consideration set?
report.log("\n12.1 PURCHASE CONSIDERATION SET ANALYSIS")
report.log("-" * 40)

# Get all purchased products
purchased_products = set(df_purchases['PRODUCT_ID'].unique())
report.log(f"\nTotal unique purchased products: {len(purchased_products):,}")

# Get all products in the ad funnel
bid_products = set(df_bids['PRODUCT_ID'].unique()) if not df_bids.empty else set()
impression_products = set(df_impressions['PRODUCT_ID'].unique()) if not df_impressions.empty else set()
click_products = set(df_clicks['PRODUCT_ID'].unique()) if not df_clicks.empty else set()

report.log(f"Products in ad funnel:")
report.log(f"  - Products with bids: {len(bid_products):,}")
report.log(f"  - Products with impressions: {len(impression_products):,}")
report.log(f"  - Products with clicks: {len(click_products):,}")

# Check overlaps
in_bids = purchased_products & bid_products
in_impressions = purchased_products & impression_products
in_clicks = purchased_products & click_products
in_any_funnel = purchased_products & (bid_products | impression_products | click_products)
organic_purchases = purchased_products - in_any_funnel

report.log(f"\nPurchased products overlap with ad funnel:")
report.log(f"  - Bid on: {len(in_bids):,} ({len(in_bids)/len(purchased_products)*100:.1f}%)")
report.log(f"  - Impressed: {len(in_impressions):,} ({len(in_impressions)/len(purchased_products)*100:.1f}%)")
report.log(f"  - Clicked: {len(in_clicks):,} ({len(in_clicks)/len(purchased_products)*100:.1f}%)")
report.log(f"  - In ANY funnel stage: {len(in_any_funnel):,} ({len(in_any_funnel)/len(purchased_products)*100:.1f}%)")
report.log(f"  - **ORGANIC (not in funnel): {len(organic_purchases):,} ({len(organic_purchases)/len(purchased_products)*100:.1f}%)**")

# Question 2: Are clicked products always impressed?
report.log("\n12.2 CLICK-IMPRESSION CONSISTENCY CHECK")
report.log("-" * 40)

if not df_clicks.empty and not df_impressions.empty:
    # Product-level consistency
    clicked_products = set(df_clicks['PRODUCT_ID'].unique())
    impressed_products = set(df_impressions['PRODUCT_ID'].unique())
    
    clicked_and_impressed = clicked_products & impressed_products
    clicked_not_impressed = clicked_products - impressed_products
    
    report.log(f"\nClicked products consistency:")
    report.log(f"  Total clicked products: {len(clicked_products):,}")
    report.log(f"  Also impressed: {len(clicked_and_impressed):,} ({len(clicked_and_impressed)/len(clicked_products)*100:.1f}%)")
    report.log(f"  NOT impressed: {len(clicked_not_impressed):,} ({len(clicked_not_impressed)/len(clicked_products)*100:.1f}%)")
    
    if len(clicked_not_impressed) > 0:
        report.log(f"\n  ⚠️  WARNING: {len(clicked_not_impressed)} products were clicked but never impressed")
        report.log("  This indicates potential data quality issues or missing impression records")
    
    # Event-level consistency check
    report.log(f"\nEvent-level consistency:")
    clicks_matched = 0
    clicks_unmatched = 0
    
    # Sample check for efficiency (checking all would be slow)
    sample_size = min(1000, len(df_clicks))
    click_sample = df_clicks.sample(n=sample_size, random_state=42)
    
    for _, click in click_sample.iterrows():
        matching_impression = df_impressions[
            (df_impressions['AUCTION_ID'] == click['AUCTION_ID']) &
            (df_impressions['USER_ID'] == click['USER_ID']) &
            (df_impressions['PRODUCT_ID'] == click['PRODUCT_ID'])
        ]
        if len(matching_impression) > 0:
            clicks_matched += 1
        else:
            clicks_unmatched += 1
    
    match_rate = clicks_matched / sample_size * 100
    report.log(f"  Sample of {sample_size:,} clicks:")
    report.log(f"    - With matching impression: {clicks_matched:,} ({match_rate:.1f}%)")
    report.log(f"    - Without matching impression: {clicks_unmatched:,} ({100-match_rate:.1f}%)")
    
    if match_rate < 100:
        estimated_total_unmatched = int(len(df_clicks) * (100-match_rate) / 100)
        report.log(f"\n  ⚠️  Estimated {estimated_total_unmatched:,} clicks without impressions in full dataset")

# Purchase Attribution Analysis
report.log("\n12.3 PURCHASE ATTRIBUTION ANALYSIS")
report.log("-" * 40)

if not df_purchases.empty:
    # Convert timestamps
    df_purchases_temp = df_purchases.copy()
    df_purchases_temp['PURCHASED_AT'] = pd.to_datetime(df_purchases_temp['PURCHASED_AT'])
    
    if not df_impressions.empty:
        df_impressions_temp = df_impressions.copy()
        df_impressions_temp['OCCURRED_AT'] = pd.to_datetime(df_impressions_temp['OCCURRED_AT'])
    
    if not df_clicks.empty:
        df_clicks_temp = df_clicks.copy()
        df_clicks_temp['OCCURRED_AT'] = pd.to_datetime(df_clicks_temp['OCCURRED_AT'])
    
    # Analyze each purchase
    purchase_attribution = []
    for _, purchase in df_purchases_temp.iterrows():
        user_id = purchase['USER_ID']
        product_id = purchase['PRODUCT_ID']
        purchase_time = purchase['PURCHASED_AT']
        
        # Look for prior exposure (7-day window)
        time_window_start = purchase_time - pd.Timedelta(days=7)
        
        had_impression = False
        had_click = False
        
        if not df_impressions.empty:
            user_impressions = df_impressions_temp[
                (df_impressions_temp['USER_ID'] == user_id) & 
                (df_impressions_temp['PRODUCT_ID'] == product_id) &
                (df_impressions_temp['OCCURRED_AT'] >= time_window_start) &
                (df_impressions_temp['OCCURRED_AT'] <= purchase_time)
            ]
            had_impression = len(user_impressions) > 0
        
        if not df_clicks.empty:
            user_clicks = df_clicks_temp[
                (df_clicks_temp['USER_ID'] == user_id) & 
                (df_clicks_temp['PRODUCT_ID'] == product_id) &
                (df_clicks_temp['OCCURRED_AT'] >= time_window_start) &
                (df_clicks_temp['OCCURRED_AT'] <= purchase_time)
            ]
            had_click = len(user_clicks) > 0
        
        purchase_attribution.append({
            'had_impression': had_impression,
            'had_click': had_click
        })
    
    df_attribution = pd.DataFrame(purchase_attribution)
    
    # Calculate attribution categories
    organic = (~df_attribution['had_impression']) & (~df_attribution['had_click'])
    view_through = df_attribution['had_impression'] & (~df_attribution['had_click'])
    click_through = df_attribution['had_click']
    
    report.log(f"\nPurchase Attribution (7-day window):")
    report.log(f"  Total purchases analyzed: {len(df_attribution):,}")
    report.log(f"  - Organic (no ad exposure): {organic.sum():,} ({organic.mean()*100:.1f}%)")
    report.log(f"  - View-through conversions: {view_through.sum():,} ({view_through.mean()*100:.1f}%)")
    report.log(f"  - Click-through conversions: {click_through.sum():,} ({click_through.mean()*100:.1f}%)")
    
    # User-level analysis
    report.log(f"\nUser-Level Purchase Patterns:")
    purchasing_users = df_purchases['USER_ID'].unique()
    users_with_ad_purchases = df_attribution[df_attribution['had_impression'] | df_attribution['had_click']].index.nunique() if len(df_attribution) > 0 else 0
    
    report.log(f"  Total purchasing users: {len(purchasing_users):,}")
    report.log(f"  Users with at least one ad-driven purchase: ~{min(users_with_ad_purchases, len(purchasing_users)):,}")
    report.log(f"  Users with only organic purchases: ~{len(purchasing_users) - min(users_with_ad_purchases, len(purchasing_users)):,}")

# Extended Consideration Set Recommendations
report.log("\n12.4 IMPLICATIONS FOR CONSIDERATION SET MODELING")
report.log("-" * 40)

report.log("\nKey Findings:")
report.log(f"  1. {len(organic_purchases)/len(purchased_products)*100:.1f}% of purchased products are completely organic")
report.log(f"  2. Only {len(in_any_funnel)/len(purchased_products)*100:.1f}% of purchases come from the ad funnel")
report.log(f"  3. Click-impression consistency is {match_rate:.1f}% (data quality concern)" if 'match_rate' in locals() else "  3. Click-impression consistency needs verification")

report.log("\nRecommendations for Extended Consideration Model:")
report.log("  • Incorporate browsing history beyond ad exposures")
report.log("  • Capture organic discovery patterns and search behavior")
report.log("  • Include category preferences and product similarity")
report.log("  • Account for the dominant role of organic purchases")
report.log("  • Address data quality issues in impression tracking")

# Data Quality Summary
report.log("\n12.5 DATA QUALITY ISSUES IDENTIFIED")
report.log("-" * 40)

issues = []
if len(clicked_not_impressed) > 0:
    issues.append(f"  • {len(clicked_not_impressed)} products clicked but never impressed")
if 'clicks_unmatched' in locals() and clicks_unmatched > 0:
    issues.append(f"  • ~{100-match_rate:.1f}% of clicks lack matching impressions")
if organic.mean() > 0.9:
    issues.append(f"  • {organic.mean()*100:.1f}% organic purchases suggest potential tracking gaps")

if issues:
    report.log("\nIdentified Issues:")
    for issue in issues:
        report.log(issue)
else:
    report.log("\n  No major data quality issues identified")

report.log("\n" + "="*80)


SECTION 12: PURCHASE FUNNEL CONSISTENCY & ATTRIBUTION

12.1 PURCHASE CONSIDERATION SET ANALYSIS
----------------------------------------

Total unique purchased products: 281
Products in ad funnel:
  - Products with bids: 366,426
  - Products with impressions: 55,937
  - Products with clicks: 1,860

Purchased products overlap with ad funnel:
  - Bid on: 23 (8.2%)
  - Impressed: 15 (5.3%)
  - Clicked: 15 (5.3%)
  - In ANY funnel stage: 23 (8.2%)
  - **ORGANIC (not in funnel): 258 (91.8%)**

12.2 CLICK-IMPRESSION CONSISTENCY CHECK
----------------------------------------

Clicked products consistency:
  Total clicked products: 1,860
  Also impressed: 1,850 (99.5%)
  NOT impressed: 10 (0.5%)

  This indicates potential data quality issues or missing impression records

Event-level consistency:
  Sample of 1,000 clicks:
    - With matching impression: 972 (97.2%)
    - Without matching impression: 28 (2.8%)

  ⚠️  Estimated 58 clicks without impressions in full dataset

12.3 PURCHASE ATTR

In [28]:
# %%
# --- FIXED EFFECTS & PANEL STRUCTURE ANALYSIS ---
report.log("\n" + "="*80)
report.log("SECTION 13: FIXED EFFECTS & PANEL STRUCTURE ANALYSIS")
report.log("="*80)

report.log("\n13.1 PANEL DIMENSIONS OVERVIEW")
report.log("-" * 40)

# Add time columns if not present
if not df_auctions.empty and 'CREATED_AT' in df_auctions.columns:
    df_auctions['CREATED_AT'] = pd.to_datetime(df_auctions['CREATED_AT'])
    df_auctions['date'] = df_auctions['CREATED_AT'].dt.date
    df_auctions['week'] = df_auctions['CREATED_AT'].dt.to_period('W')
    df_auctions['day'] = df_auctions['CREATED_AT'].dt.to_period('D')
    df_auctions['hour'] = df_auctions['CREATED_AT'].dt.hour

if not df_impressions.empty and 'OCCURRED_AT' in df_impressions.columns:
    df_impressions['OCCURRED_AT'] = pd.to_datetime(df_impressions['OCCURRED_AT'])
    df_impressions['date'] = df_impressions['OCCURRED_AT'].dt.date
    df_impressions['week'] = df_impressions['OCCURRED_AT'].dt.to_period('W')

if not df_clicks.empty and 'OCCURRED_AT' in df_clicks.columns:
    df_clicks['OCCURRED_AT'] = pd.to_datetime(df_clicks['OCCURRED_AT'])
    df_clicks['date'] = df_clicks['OCCURRED_AT'].dt.date
    df_clicks['week'] = df_clicks['OCCURRED_AT'].dt.to_period('W')

if not df_purchases.empty and 'PURCHASED_AT' in df_purchases.columns:
    df_purchases['PURCHASED_AT'] = pd.to_datetime(df_purchases['PURCHASED_AT'])
    df_purchases['date'] = df_purchases['PURCHASED_AT'].dt.date
    df_purchases['week'] = df_purchases['PURCHASED_AT'].dt.to_period('W')

# Basic panel dimensions
report.log("\nUnique Entities in Dataset:")
n_users = df_auctions['OPAQUE_USER_ID'].nunique() if 'OPAQUE_USER_ID' in df_auctions.columns else 0
n_products = len(set(df_bids['PRODUCT_ID'].unique()) | set(df_purchases['PRODUCT_ID'].unique())) if not df_bids.empty else 0
n_vendors = df_bids['VENDOR_ID'].nunique() if 'VENDOR_ID' in df_bids.columns else 0
n_campaigns = df_bids['CAMPAIGN_ID'].nunique() if 'CAMPAIGN_ID' in df_bids.columns else 0

report.log(f"  Users: {n_users:,}")
report.log(f"  Products: {n_products:,}")
report.log(f"  Vendors: {n_vendors:,}")
report.log(f"  Campaigns: {n_campaigns:,}")

# Time dimensions
if 'week' in df_auctions.columns:
    n_weeks = df_auctions['week'].nunique()
    n_days = df_auctions['day'].nunique()
    report.log(f"\nTime Periods:")
    report.log(f"  Weeks: {n_weeks}")
    report.log(f"  Days: {n_days}")
    report.log(f"  Date range: {df_auctions['CREATED_AT'].min().date()} to {df_auctions['CREATED_AT'].max().date()}")

# USER FIXED EFFECTS ANALYSIS
report.log("\n13.2 USER FIXED EFFECTS POTENTIAL")
report.log("-" * 40)

# User activity across time
if not df_auctions.empty:
    user_activity = df_auctions.groupby('OPAQUE_USER_ID').agg({
        'AUCTION_ID': 'count',
        'week': 'nunique' if 'week' in df_auctions.columns else lambda x: 0,
        'day': 'nunique' if 'day' in df_auctions.columns else lambda x: 0
    }).rename(columns={'AUCTION_ID': 'auctions', 'week': 'weeks_active', 'day': 'days_active'})
    
    # Categorize users by activity level
    single_day_users = (user_activity['days_active'] == 1).sum()
    multi_day_users = (user_activity['days_active'] > 1).sum()
    multi_week_users = (user_activity['weeks_active'] > 1).sum() if 'week' in df_auctions.columns else 0
    
    report.log("\nUser Repeat Activity:")
    report.log(f"  Single-day users: {single_day_users:,} ({single_day_users/n_users*100:.1f}%)")
    report.log(f"  Multi-day users: {multi_day_users:,} ({multi_day_users/n_users*100:.1f}%)")
    report.log(f"  Multi-week users: {multi_week_users:,} ({multi_week_users/n_users*100:.1f}%)")
    
    # User activity distribution
    report.log("\nUser Activity Distribution:")
    activity_percentiles = user_activity['auctions'].quantile([0.25, 0.50, 0.75, 0.90, 0.95, 0.99])
    report.log(f"  25th percentile: {activity_percentiles[0.25]:.0f} auctions")
    report.log(f"  Median: {activity_percentiles[0.50]:.0f} auctions")
    report.log(f"  75th percentile: {activity_percentiles[0.75]:.0f} auctions")
    report.log(f"  95th percentile: {activity_percentiles[0.95]:.0f} auctions")
    report.log(f"  99th percentile: {activity_percentiles[0.99]:.0f} auctions")
    
    # Users with purchases
    if not df_purchases.empty:
        purchasing_users = df_purchases['USER_ID'].value_counts()
        repeat_purchasers = (purchasing_users > 1).sum()
        report.log(f"\nPurchase Patterns:")
        report.log(f"  Users with purchases: {len(purchasing_users):,}")
        report.log(f"  Repeat purchasers: {repeat_purchasers:,} ({repeat_purchasers/len(purchasing_users)*100:.1f}%)")
        report.log(f"  Max purchases per user: {purchasing_users.max()}")

# PRODUCT FIXED EFFECTS ANALYSIS
report.log("\n13.3 PRODUCT FIXED EFFECTS POTENTIAL")
report.log("-" * 40)

if not df_impressions.empty:
    product_impressions = df_impressions.groupby('PRODUCT_ID').agg({
        'INTERACTION_ID': 'count',
        'week': 'nunique' if 'week' in df_impressions.columns else lambda x: 0,
        'USER_ID': 'nunique'
    }).rename(columns={'INTERACTION_ID': 'impressions', 'week': 'weeks_active', 'USER_ID': 'unique_users'})
    
    # Products with repeated exposure
    products_multi_week = (product_impressions['weeks_active'] > 1).sum() if 'week' in df_impressions.columns else 0
    products_multi_user = (product_impressions['unique_users'] > 1).sum()
    
    report.log("\nProduct Repeat Exposure:")
    report.log(f"  Products impressed: {len(product_impressions):,}")
    report.log(f"  Products shown multiple weeks: {products_multi_week:,} ({products_multi_week/len(product_impressions)*100:.1f}%)")
    report.log(f"  Products shown to multiple users: {products_multi_user:,} ({products_multi_user/len(product_impressions)*100:.1f}%)")
    
    # Top products
    report.log("\nTop 5 Most Impressed Products:")
    for idx, (product, row) in enumerate(product_impressions.nlargest(5, 'impressions').iterrows(), 1):
        report.log(f"  {idx}. {str(product)[:20]}...: {row['impressions']:,} impressions, {row['unique_users']:,} users")

# VENDOR FIXED EFFECTS ANALYSIS
report.log("\n13.4 VENDOR FIXED EFFECTS POTENTIAL")
report.log("-" * 40)

if not df_bids.empty:
    # Vendor activity
    vendor_activity = df_bids.groupby('VENDOR_ID').agg({
        'AUCTION_ID': 'nunique',
        'PRODUCT_ID': 'nunique',
        'CAMPAIGN_ID': 'nunique',
        'IS_WINNER': ['sum', 'mean']
    })
    vendor_activity.columns = ['auctions', 'products', 'campaigns', 'wins', 'win_rate']
    
    # Vendor persistence
    if 'week' in df_auctions.columns:
        vendor_weeks = df_bids.merge(df_auctions[['AUCTION_ID', 'week']], on='AUCTION_ID')
        vendor_time_activity = vendor_weeks.groupby('VENDOR_ID')['week'].nunique()
        vendors_multi_week = (vendor_time_activity > 1).sum()
        
        report.log(f"\nVendor Activity Over Time:")
        report.log(f"  Active vendors: {n_vendors:,}")
        report.log(f"  Vendors active multiple weeks: {vendors_multi_week:,} ({vendors_multi_week/n_vendors*100:.1f}%)")
    
    # Vendor concentration
    report.log("\nVendor Concentration:")
    vendor_bid_counts = df_bids['VENDOR_ID'].value_counts()
    top_10_vendors = vendor_bid_counts.head(10).sum()
    top_100_vendors = vendor_bid_counts.head(100).sum()
    report.log(f"  Top 10 vendors: {top_10_vendors:,} bids ({top_10_vendors/len(df_bids)*100:.1f}% of total)")
    report.log(f"  Top 100 vendors: {top_100_vendors:,} bids ({top_100_vendors/len(df_bids)*100:.1f}% of total)")
    
    # Vendor performance distribution
    report.log("\nVendor Performance Distribution:")
    win_rate_percentiles = vendor_activity['win_rate'].quantile([0.25, 0.50, 0.75, 0.90])
    report.log(f"  25th percentile win rate: {win_rate_percentiles[0.25]:.2%}")
    report.log(f"  Median win rate: {win_rate_percentiles[0.50]:.2%}")
    report.log(f"  75th percentile win rate: {win_rate_percentiles[0.75]:.2%}")
    report.log(f"  90th percentile win rate: {win_rate_percentiles[0.90]:.2%}")

# TIME FIXED EFFECTS ANALYSIS
report.log("\n13.5 TIME FIXED EFFECTS POTENTIAL")
report.log("-" * 40)

if 'week' in df_auctions.columns:
    # Weekly patterns
    weekly_activity = df_auctions.groupby('week').agg({
        'AUCTION_ID': 'count',
        'OPAQUE_USER_ID': 'nunique'
    }).rename(columns={'AUCTION_ID': 'auctions', 'OPAQUE_USER_ID': 'unique_users'})
    
    report.log("\nWeekly Activity Variation:")
    report.log(f"  Mean auctions per week: {weekly_activity['auctions'].mean():.0f}")
    report.log(f"  Std dev: {weekly_activity['auctions'].std():.0f}")
    report.log(f"  Coefficient of variation: {weekly_activity['auctions'].std()/weekly_activity['auctions'].mean():.2f}")
    
    # Day of week patterns
    if not df_auctions.empty:
        df_auctions['dayofweek'] = pd.to_datetime(df_auctions['CREATED_AT']).dt.dayofweek
        day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
        
        report.log("\nDay of Week Patterns:")
        for day in range(7):
            day_auctions = (df_auctions['dayofweek'] == day).sum()
            report.log(f"  {day_names[day]}: {day_auctions:,} auctions ({day_auctions/len(df_auctions)*100:.1f}%)")
    
    # Hour of day patterns
    if 'hour' in df_auctions.columns:
        peak_hours = df_auctions['hour'].value_counts().head(3)
        report.log("\nPeak Activity Hours:")
        for hour, count in peak_hours.items():
            report.log(f"  {hour}:00-{hour+1}:00: {count:,} auctions ({count/len(df_auctions)*100:.1f}%)")

# PANEL BALANCE ASSESSMENT
report.log("\n13.6 PANEL BALANCE ASSESSMENT")
report.log("-" * 40)

# Create user-week panel
if 'week' in df_auctions.columns:
    user_weeks = df_auctions.groupby(['OPAQUE_USER_ID', 'week']).size().reset_index(name='activity')
    all_weeks = df_auctions['week'].unique()
    all_users = df_auctions['OPAQUE_USER_ID'].unique()
    
    # Calculate panel completeness
    potential_observations = len(all_users) * len(all_weeks)
    actual_observations = len(user_weeks)
    panel_completeness = actual_observations / potential_observations * 100
    
    report.log(f"\nUser-Week Panel:")
    report.log(f"  Potential observations: {potential_observations:,}")
    report.log(f"  Actual observations: {actual_observations:,}")
    report.log(f"  Panel completeness: {panel_completeness:.1f}%")
    
    # Balance categories
    user_week_counts = user_weeks.groupby('OPAQUE_USER_ID').size()
    balanced_users = (user_week_counts == n_weeks).sum()
    report.log(f"  Fully balanced users (all weeks): {balanced_users:,} ({balanced_users/n_users*100:.1f}%)")

# FIXED EFFECTS RECOMMENDATIONS
report.log("\n13.7 FIXED EFFECTS MODEL RECOMMENDATIONS")
report.log("-" * 40)

report.log("\nViability Assessment:")

# User FE
user_fe_viable = multi_day_users > n_users * 0.3
report.log(f"  User Fixed Effects: {'✓ RECOMMENDED' if user_fe_viable else '✗ LIMITED'}")
report.log(f"    - {multi_day_users/n_users*100:.1f}% of users appear multiple days")
report.log(f"    - {multi_week_users/n_users*100:.1f}% of users appear multiple weeks")

# Product FE
if 'products_multi_week' in locals():
    product_fe_viable = products_multi_week > len(product_impressions) * 0.3
    report.log(f"  Product Fixed Effects: {'✓ RECOMMENDED' if product_fe_viable else '✗ LIMITED'}")
    report.log(f"    - {products_multi_week/len(product_impressions)*100:.1f}% of products appear multiple weeks")

# Vendor FE
if 'vendors_multi_week' in locals():
    vendor_fe_viable = vendors_multi_week > n_vendors * 0.3
    report.log(f"  Vendor Fixed Effects: {'✓ RECOMMENDED' if vendor_fe_viable else '✗ LIMITED'}")
    report.log(f"    - {vendors_multi_week/n_vendors*100:.1f}% of vendors active multiple weeks")

# Time FE
report.log(f"  Time Fixed Effects: ✓ RECOMMENDED")
report.log(f"    - {n_weeks} weeks available")
report.log(f"    - Clear day-of-week patterns observed")

report.log("\nSuggested Model Specifications:")
report.log("  1. User FE + Time FE (for user-level outcomes)")
report.log("  2. Product FE + Time FE (for product performance)")
report.log("  3. Vendor FE + Time FE (for vendor competition analysis)")
report.log("  4. Two-way FE: User × Product (if sufficient overlap)")

# Calculate some two-way FE potential
if not df_impressions.empty:
    user_product_pairs = df_impressions.groupby(['USER_ID', 'PRODUCT_ID']).size()
    repeat_pairs = (user_product_pairs > 1).sum()
    report.log(f"\n  User-Product pairs with repeats: {repeat_pairs:,} ({repeat_pairs/len(user_product_pairs)*100:.1f}%)")

report.log("\n" + "="*80)


SECTION 13: FIXED EFFECTS & PANEL STRUCTURE ANALYSIS

13.1 PANEL DIMENSIONS OVERVIEW
----------------------------------------

Unique Entities in Dataset:
  Users: 773
  Products: 366,684
  Vendors: 40,252
  Campaigns: 66,668

Time Periods:
  Weeks: 2
  Days: 10
  Date range: 2025-08-28 to 2025-09-06

13.2 USER FIXED EFFECTS POTENTIAL
----------------------------------------

User Repeat Activity:
  Single-day users: 389 (50.3%)
  Multi-day users: 384 (49.7%)
  Multi-week users: 288 (37.3%)

User Activity Distribution:
  25th percentile: 2 auctions
  Median: 6 auctions
  75th percentile: 18 auctions
  95th percentile: 118 auctions
  99th percentile: 268 auctions

Purchase Patterns:
  Users with purchases: 137
  Repeat purchasers: 57 (41.6%)
  Max purchases per user: 65

13.3 PRODUCT FIXED EFFECTS POTENTIAL
----------------------------------------

Product Repeat Exposure:
  Products impressed: 55,937
  Products shown multiple weeks: 2,007 (3.6%)
  Products shown to multiple users: 2,1