# Auction-Level Analysis

## Objective
Analyze auctions themselves: how many win, how many pay, competition dynamics, and auction efficiency.

## Research Questions
1. How many bids per auction? What's the competition distribution?
2. How many winners per auction? Single-slot vs multi-slot auctions?
3. Who pays? Payment concentration and clearing prices?
4. Winner concentration: Do same campaigns/vendors dominate?
5. Auction efficiency: Fill rates, competition quality?

## Data Period
September 2-8, 2025 (14 days), 0.1% user sample

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

print("Loading data with progress tracking...")
print("="*80)

df_auctions_results = pd.read_parquet('data/raw_auctions_results_20251011.parquet')
print(f"✓ Loaded AUCTIONS_RESULTS: {len(df_auctions_results):,} rows")

df_auctions_users = pd.read_parquet('data/raw_auctions_users_20251011.parquet')
print(f"✓ Loaded AUCTIONS_USERS: {len(df_auctions_users):,} rows")

df_impressions = pd.read_parquet('data/raw_impressions_20251011.parquet')
print(f"✓ Loaded IMPRESSIONS: {len(df_impressions):,} rows")

df_clicks = pd.read_parquet('data/raw_clicks_20251011.parquet')
print(f"✓ Loaded CLICKS: {len(df_clicks):,} rows")

print("\nMerging auction data...")
df_auctions_results_clean = df_auctions_results.drop(columns=['CREATED_AT'])
df = pd.merge(df_auctions_results_clean, 
              df_auctions_users[['AUCTION_ID', 'CREATED_AT', 'PLACEMENT', 'OPAQUE_USER_ID']], 
              on='AUCTION_ID', how='left')
print(f"✓ Merged dataset: {len(df):,} rows")

print("\nCreating derived features...")
df['datetime'] = pd.to_datetime(df['CREATED_AT'])
df['hour'] = df['datetime'].dt.hour
df['day_of_week'] = df['datetime'].dt.dayofweek
df['date'] = df['datetime'].dt.date
df['FINAL_BID_DOLLARS'] = df['FINAL_BID'] / 100
df['PRICE_DOLLARS'] = df['PRICE'] / 100

print(f"\nTotal bids: {len(df):,}")
print(f"Total auctions: {df['AUCTION_ID'].nunique():,}")
print(f"Mean bids per auction: {len(df) / df['AUCTION_ID'].nunique():.2f}")
print("\n" + "="*80)

Loading data with progress tracking...
✓ Loaded AUCTIONS_RESULTS: 18,838,670 rows
✓ Loaded AUCTIONS_USERS: 413,457 rows
✓ Loaded IMPRESSIONS: 533,146 rows
✓ Loaded CLICKS: 16,706 rows

Merging auction data...
✓ Merged dataset: 18,840,598 rows

Creating derived features...

Total bids: 18,840,598
Total auctions: 410,365
Mean bids per auction: 45.91



## Section 1: Auction Basics

In [2]:
print("="*80)
print("SECTION 1: AUCTION BASICS")
print("="*80)

# Auction-level aggregation
print("\nComputing auction-level statistics...")
auction_stats = df.groupby('AUCTION_ID').agg({
    'VENDOR_ID': 'count',  # total bids
    'IS_WINNER': ['sum', 'mean'],  # winners count and rate
    'FINAL_BID': ['mean', 'max', 'min'],
    'PRICE': 'sum',  # total payment
    'QUALITY': 'mean',
    'PLACEMENT': 'first',
    'CAMPAIGN_ID': 'nunique',
    'PRODUCT_ID': 'nunique'
}).reset_index()

auction_stats.columns = ['_'.join(col).strip('_') for col in auction_stats.columns.values]
auction_stats.columns = ['AUCTION_ID', 'n_bids', 'n_winners', 'win_rate', 
                         'mean_bid', 'max_bid', 'min_bid', 'total_payment',
                         'mean_quality', 'PLACEMENT', 'n_campaigns', 'n_products']

print("\n1. OVERALL AUCTION STATISTICS")
print("-" * 80)
print(f"Total auctions: {len(auction_stats):,}")
print(f"Total bids: {len(df):,}")
print(f"Total winners: {df['IS_WINNER'].sum():,} ({df['IS_WINNER'].mean()*100:.2f}%)")

print("\n2. BIDS PER AUCTION DISTRIBUTION")
print("-" * 80)
print(f"Mean bids per auction: {auction_stats['n_bids'].mean():.2f}")
print(f"Median bids per auction: {auction_stats['n_bids'].median():.0f}")
print(f"Std bids per auction: {auction_stats['n_bids'].std():.2f}")
print(f"Min bids per auction: {auction_stats['n_bids'].min():.0f}")
print(f"Max bids per auction: {auction_stats['n_bids'].max():.0f}")

print("\nBids per auction percentiles:")
for q in [0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99]:
    print(f"  {q*100:5.1f}%: {auction_stats['n_bids'].quantile(q):.0f} bids")

print("\nBid density categories:")
print(f"  Very low (<5 bids):      {(auction_stats['n_bids'] < 5).sum():,} ({(auction_stats['n_bids'] < 5).mean()*100:.2f}%)")
print(f"  Low (5-20 bids):         {((auction_stats['n_bids'] >= 5) & (auction_stats['n_bids'] < 20)).sum():,} ({((auction_stats['n_bids'] >= 5) & (auction_stats['n_bids'] < 20)).mean()*100:.2f}%)")
print(f"  Medium (20-50 bids):     {((auction_stats['n_bids'] >= 20) & (auction_stats['n_bids'] < 50)).sum():,} ({((auction_stats['n_bids'] >= 20) & (auction_stats['n_bids'] < 50)).mean()*100:.2f}%)")
print(f"  High (50-100 bids):      {((auction_stats['n_bids'] >= 50) & (auction_stats['n_bids'] < 100)).sum():,} ({((auction_stats['n_bids'] >= 50) & (auction_stats['n_bids'] < 100)).mean()*100:.2f}%)")
print(f"  Very high (100+ bids):   {(auction_stats['n_bids'] >= 100).sum():,} ({(auction_stats['n_bids'] >= 100).mean()*100:.2f}%)")

print("\n3. WINNERS PER AUCTION")
print("-" * 80)
print(f"Mean winners per auction: {auction_stats['n_winners'].mean():.2f}")
print(f"Median winners per auction: {auction_stats['n_winners'].median():.0f}")
print(f"Max winners per auction: {auction_stats['n_winners'].max():.0f}")

print("\nWinners per auction distribution:")
winner_counts = auction_stats['n_winners'].value_counts().sort_index()
for n_winners in sorted(winner_counts.index[:20]):
    count = winner_counts[n_winners]
    pct = count / len(auction_stats) * 100
    print(f"  {n_winners:3.0f} winners: {count:8,} auctions ({pct:5.2f}%)")

if len(winner_counts) > 20:
    print(f"  ... ({len(winner_counts) - 20} more categories)")

print("\n4. AUCTION FILL RATE")
print("-" * 80)
auctions_with_winners = (auction_stats['n_winners'] > 0).sum()
fill_rate = auctions_with_winners / len(auction_stats)
print(f"Auctions with at least 1 winner: {auctions_with_winners:,} ({fill_rate*100:.2f}%)")
print(f"Auctions with no winners: {(auction_stats['n_winners'] == 0).sum():,} ({(1-fill_rate)*100:.2f}%)")

print("\n5. PLACEMENT-SPECIFIC METRICS")
print("-" * 80)
placement_summary = auction_stats.groupby('PLACEMENT').agg({
    'AUCTION_ID': 'count',
    'n_bids': 'mean',
    'n_winners': 'mean',
    'win_rate': 'mean',
    'mean_bid': 'mean'
}).round(2)
placement_summary.columns = ['n_auctions', 'avg_bids', 'avg_winners', 'avg_win_rate', 'avg_bid']
print("\nAuction metrics by placement:")
print(placement_summary.to_string())

print("\n" + "="*80)

SECTION 1: AUCTION BASICS

Computing auction-level statistics...

1. OVERALL AUCTION STATISTICS
--------------------------------------------------------------------------------
Total auctions: 410,365
Total bids: 18,840,598
Total winners: 15,510,672 (82.33%)

2. BIDS PER AUCTION DISTRIBUTION
--------------------------------------------------------------------------------
Mean bids per auction: 45.91
Median bids per auction: 50
Std bids per auction: 16.29
Min bids per auction: 1
Max bids per auction: 594

Bids per auction percentiles:
   10.0%: 17 bids
   25.0%: 41 bids
   50.0%: 50 bids
   75.0%: 58 bids
   90.0%: 58 bids
   95.0%: 58 bids
   99.0%: 66 bids

Bid density categories:
  Very low (<5 bids):      19,658 (4.79%)
  Low (5-20 bids):         24,541 (5.98%)
  Medium (20-50 bids):     119,107 (29.02%)
  High (50-100 bids):      247,043 (60.20%)
  Very high (100+ bids):   16 (0.00%)

3. WINNERS PER AUCTION
---------------------------------------------------------------------------

## Section 2: Competition Dynamics

In [None]:
print("="*80)
print("SECTION 2: COMPETITION DYNAMICS")
print("="*80)

print("\n1. COMPETITION INTENSITY BY PLACEMENT")
print("-" * 80)
for placement in sorted(auction_stats['PLACEMENT'].unique()):
    placement_data = auction_stats[auction_stats['PLACEMENT'] == placement]
    print(f"\nPlacement {placement}:")
    print(f"  Auctions: {len(placement_data):,}")
    print(f"  Mean bids: {placement_data['n_bids'].mean():.2f}")
    print(f"  Median bids: {placement_data['n_bids'].median():.0f}")
    print(f"  Mean winners: {placement_data['n_winners'].mean():.2f}")
    print(f"  P90 bids: {placement_data['n_bids'].quantile(0.90):.0f}")
    print(f"  P99 bids: {placement_data['n_bids'].quantile(0.99):.0f}")

print("\n2. CAMPAIGN DIVERSITY PER AUCTION")
print("-" * 80)
print(f"Mean campaigns per auction: {auction_stats['n_campaigns'].mean():.2f}")
print(f"Median campaigns per auction: {auction_stats['n_campaigns'].median():.0f}")
print(f"Mean products per auction: {auction_stats['n_products'].mean():.2f}")
print(f"Median products per auction: {auction_stats['n_products'].median():.0f}")

print("\nCampaign diversity distribution:")
for q in [0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99]:
    print(f"  {q*100:5.1f}%: {auction_stats['n_campaigns'].quantile(q):.0f} campaigns")

print("\n3. COMPETITION CONCENTRATION (HHI)")
print("-" * 80)
print("Computing Herfindahl-Hirschman Index for sample auctions...")

sample_auctions = auction_stats.sample(min(5000, len(auction_stats)), random_state=42)['AUCTION_ID'].values
hhi_scores = []

for auction_id in tqdm(sample_auctions, desc="Computing HHI"):
    auction_bids = df[df['AUCTION_ID'] == auction_id]
    vendor_counts = auction_bids['VENDOR_ID'].value_counts()
    shares = (vendor_counts / vendor_counts.sum()) ** 2
    hhi = shares.sum()
    hhi_scores.append(hhi)

hhi_scores = np.array(hhi_scores)
print(f"\nMean HHI: {hhi_scores.mean():.4f}")
print(f"Median HHI: {np.median(hhi_scores):.4f}")
print(f"\nInterpretation:")
print(f"  HHI < 0.15: Competitive (n={( hhi_scores < 0.15).sum()}, {(hhi_scores < 0.15).mean()*100:.2f}%)")
print(f"  0.15-0.25: Moderate concentration (n={(( hhi_scores >= 0.15) & (hhi_scores < 0.25)).sum()}, {((hhi_scores >= 0.15) & (hhi_scores < 0.25)).mean()*100:.2f}%)")
print(f"  HHI > 0.25: High concentration (n={( hhi_scores >= 0.25).sum()}, {(hhi_scores >= 0.25).mean()*100:.2f}%)")

print("\n4. TEMPORAL COMPETITION PATTERNS")
print("-" * 80)
auction_with_time = pd.merge(auction_stats, 
                             df_auctions_users[['AUCTION_ID', 'CREATED_AT']], 
                             on='AUCTION_ID')
auction_with_time['datetime'] = pd.to_datetime(auction_with_time['CREATED_AT'])
auction_with_time['hour'] = auction_with_time['datetime'].dt.hour
auction_with_time['day_of_week'] = auction_with_time['datetime'].dt.dayofweek

print("\nCompetition by hour of day:")
hourly_competition = auction_with_time.groupby('hour')['n_bids'].mean().round(2)
peak_hour = hourly_competition.idxmax()
low_hour = hourly_competition.idxmin()
print(f"  Peak hour: {peak_hour}:00 ({hourly_competition[peak_hour]:.2f} avg bids)")
print(f"  Low hour: {low_hour}:00 ({hourly_competition[low_hour]:.2f} avg bids)")
print(f"  Range: {hourly_competition.max() - hourly_competition.min():.2f} bids")

print("\nCompetition by day of week:")
daily_competition = auction_with_time.groupby('day_of_week')['n_bids'].mean().round(2)
days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
for day_num, avg_bids in daily_competition.items():
    if day_num < 7:
        print(f"  {days[int(day_num)]}: {avg_bids:.2f} avg bids")

print("\n5. COMPETITIVE VS NON-COMPETITIVE AUCTIONS")
print("-" * 80)
auction_stats['competitive'] = auction_stats['n_bids'] >= 10
competitive = auction_stats[auction_stats['competitive']]
non_competitive = auction_stats[~auction_stats['competitive']]

print(f"Competitive auctions (≥10 bids): {len(competitive):,} ({len(competitive)/len(auction_stats)*100:.2f}%)")
print(f"  Mean bids: {competitive['n_bids'].mean():.2f}")
print(f"  Mean winners: {competitive['n_winners'].mean():.2f}")
print(f"  Mean campaigns: {competitive['n_campaigns'].mean():.2f}")
print(f"  Win rate: {competitive['win_rate'].mean()*100:.2f}%")

print(f"\nNon-competitive auctions (<10 bids): {len(non_competitive):,} ({len(non_competitive)/len(auction_stats)*100:.2f}%)")
print(f"  Mean bids: {non_competitive['n_bids'].mean():.2f}")
print(f"  Mean winners: {non_competitive['n_winners'].mean():.2f}")
print(f"  Mean campaigns: {non_competitive['n_campaigns'].mean():.2f}")
print(f"  Win rate: {non_competitive['win_rate'].mean()*100:.2f}%")

print("\n" + "="*80)

SECTION 2: COMPETITION DYNAMICS

1. COMPETITION INTENSITY BY PLACEMENT
--------------------------------------------------------------------------------

Placement 1:
  Auctions: 38,591
  Mean bids: 38.93
  Median bids: 50
  Mean winners: 29.42
  P90 bids: 58
  P99 bids: 58

Placement 2:
  Auctions: 143,349
  Mean bids: 34.90
  Median bids: 40
  Mean winners: 29.89
  P90 bids: 49
  P99 bids: 66

Placement 3:
  Auctions: 61,202
  Mean bids: 49.68
  Median bids: 50
  Mean winners: 39.68
  P90 bids: 58
  P99 bids: 58

Placement 4:
  Auctions: 6,980
  Mean bids: 25.61
  Median bids: 19
  Mean winners: 20.48
  P90 bids: 50
  P99 bids: 58

Placement 5:
  Auctions: 160,243
  Mean bids: 56.89
  Median bids: 58
  Mean winners: 46.92
  P90 bids: 58
  P99 bids: 58

2. CAMPAIGN DIVERSITY PER AUCTION
--------------------------------------------------------------------------------
Mean campaigns per auction: 30.91
Median campaigns per auction: 35
Mean products per auction: 45.89
Median products per a

Computing HHI:   9%|▊         | 431/5000 [03:32<39:10,  1.94it/s]  

## Section 3: Payment Analysis

In [None]:
print("="*80)
print("SECTION 3: PAYMENT ANALYSIS")
print("="*80)

winners = df[df['IS_WINNER'] == True].copy()

print("\n1. PAYMENT POPULATION")
print("-" * 80)
print(f"Total winners: {len(winners):,}")
print(f"Winners with PRICE: {winners['PRICE'].notna().sum():,} ({winners['PRICE'].notna().mean()*100:.2f}%)")
print(f"Winners without PRICE: {winners['PRICE'].isna().sum():,} ({winners['PRICE'].isna().mean()*100:.2f}%)")

winners_with_price = winners[winners['PRICE'].notna()]

print("\n2. PAYMENT DISTRIBUTION (WINNERS WITH PRICE)")
print("-" * 80)
print(f"Mean payment: ${winners_with_price['PRICE_DOLLARS'].mean():.4f}")
print(f"Median payment: ${winners_with_price['PRICE_DOLLARS'].median():.4f}")
print(f"Total revenue: ${winners_with_price['PRICE_DOLLARS'].sum():,.2f}")
print(f"Std payment: ${winners_with_price['PRICE_DOLLARS'].std():.4f}")

print("\nPayment percentiles:")
for q in [0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99]:
    print(f"  {q*100:5.1f}%: ${winners_with_price['PRICE_DOLLARS'].quantile(q):.4f}")

print("\n3. AUCTION-LEVEL REVENUE")
print("-" * 80)
auctions_with_revenue = auction_stats[auction_stats['total_payment'] > 0]
print(f"Auctions with revenue: {len(auctions_with_revenue):,} ({len(auctions_with_revenue)/len(auction_stats)*100:.2f}%)")
print(f"Mean revenue per auction: ${(auctions_with_revenue['total_payment']/100).mean():.4f}")
print(f"Median revenue per auction: ${(auctions_with_revenue['total_payment']/100).median():.4f}")
print(f"Total revenue: ${auction_stats['total_payment'].sum()/100:,.2f}")

print("\nRevenue per auction percentiles:")
for q in [0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99]:
    print(f"  {q*100:5.1f}%: ${(auctions_with_revenue['total_payment']/100).quantile(q):.4f}")

print("\n4. PAYMENT BY PLACEMENT")
print("-" * 80)
payment_by_placement = winners_with_price.groupby('PLACEMENT').agg({
    'PRICE_DOLLARS': ['count', 'mean', 'sum']
}).round(4)
payment_by_placement.columns = ['n_payments', 'avg_payment', 'total_revenue']
print("\nPayment metrics by placement:")
print(payment_by_placement.to_string())

print("\n5. CLEARING PRICE ANALYSIS")
print("-" * 80)
winners_with_price['bid_price_diff'] = winners_with_price['FINAL_BID'] - winners_with_price['PRICE']
winners_with_price['discount_rate'] = winners_with_price['bid_price_diff'] / winners_with_price['FINAL_BID']

print(f"Mean clearing price: ${winners_with_price['PRICE_DOLLARS'].mean():.4f}")
print(f"Mean submitted bid: ${winners_with_price['FINAL_BID_DOLLARS'].mean():.4f}")
print(f"Mean discount: ${(winners_with_price['bid_price_diff']/100).mean():.4f}")
print(f"Mean discount rate: {winners_with_price['discount_rate'].mean()*100:.2f}%")

# Auction types
tolerance = 0.5
first_price = (winners_with_price['bid_price_diff'].abs() <= tolerance).sum()
second_price = (winners_with_price['bid_price_diff'] > tolerance).sum()
anomaly = (winners_with_price['bid_price_diff'] < -tolerance).sum()

print("\nAuction type distribution:")
print(f"  First-price (BID ≈ PRICE): {first_price:,} ({first_price/len(winners_with_price)*100:.2f}%)")
print(f"  Second-price (BID > PRICE): {second_price:,} ({second_price/len(winners_with_price)*100:.2f}%)")
print(f"  Anomaly (PRICE > BID): {anomaly:,} ({anomaly/len(winners_with_price)*100:.2f}%)")

print("\n6. PAYMENT CONCENTRATION")
print("-" * 80)
campaign_payments = winners_with_price.groupby('CAMPAIGN_ID')['PRICE_DOLLARS'].sum().sort_values(ascending=False)
vendor_payments = winners_with_price.groupby('VENDOR_ID')['PRICE_DOLLARS'].sum().sort_values(ascending=False)

total_revenue = campaign_payments.sum()
top_10_campaigns = campaign_payments.head(10).sum()
top_100_campaigns = campaign_payments.head(100).sum()
top_10_vendors = vendor_payments.head(10).sum()

print(f"Total revenue: ${total_revenue:,.2f}")
print(f"\nCampaign concentration:")
print(f"  Top 10 campaigns: ${top_10_campaigns:,.2f} ({top_10_campaigns/total_revenue*100:.2f}%)")
print(f"  Top 100 campaigns: ${top_100_campaigns:,.2f} ({top_100_campaigns/total_revenue*100:.2f}%)")
print(f"  Total campaigns paying: {len(campaign_payments):,}")

print(f"\nVendor concentration:")
print(f"  Top 10 vendors: ${top_10_vendors:,.2f} ({top_10_vendors/total_revenue*100:.2f}%)")
print(f"  Total vendors paying: {len(vendor_payments):,}")

print("\nTop 10 paying campaigns:")
for i, (campaign_id, revenue) in enumerate(campaign_payments.head(10).items(), 1):
    print(f"  {i:2d}. {campaign_id[:20]}... : ${revenue:10,.2f}")

print("\n7. REVENUE BY TIME")
print("-" * 80)
revenue_by_hour = winners_with_price.groupby('hour')['PRICE_DOLLARS'].sum().round(2)
peak_revenue_hour = revenue_by_hour.idxmax()
low_revenue_hour = revenue_by_hour.idxmin()
print(f"Peak revenue hour: {peak_revenue_hour}:00 (${revenue_by_hour[peak_revenue_hour]:,.2f})")
print(f"Low revenue hour: {low_revenue_hour}:00 (${revenue_by_hour[low_revenue_hour]:,.2f})")
print(f"Total daily revenue: ${revenue_by_hour.sum():,.2f}")

print("\n" + "="*80)

## Section 4: Winner Concentration

In [None]:
print("="*80)
print("SECTION 4: WINNER CONCENTRATION")
print("="*80)

print("\n1. CAMPAIGN WIN STATISTICS")
print("-" * 80)
campaign_wins = df.groupby('CAMPAIGN_ID').agg({
    'IS_WINNER': ['sum', 'count', 'mean'],
    'AUCTION_ID': 'nunique'
}).reset_index()
campaign_wins.columns = ['CAMPAIGN_ID', 'n_wins', 'n_bids', 'win_rate', 'n_auctions']

print(f"Total campaigns: {len(campaign_wins):,}")
print(f"Campaigns with at least 1 win: {(campaign_wins['n_wins'] > 0).sum():,} ({(campaign_wins['n_wins'] > 0).mean()*100:.2f}%)")
print(f"Mean wins per campaign: {campaign_wins['n_wins'].mean():.2f}")
print(f"Median wins per campaign: {campaign_wins['n_wins'].median():.0f}")

print("\nWin distribution:")
for q in [0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99]:
    print(f"  {q*100:5.1f}%: {campaign_wins['n_wins'].quantile(q):.0f} wins")

print("\n2. WIN CONCENTRATION")
print("-" * 80)
sorted_wins = campaign_wins['n_wins'].sort_values(ascending=False)
total_wins = sorted_wins.sum()
top_10 = sorted_wins.head(10).sum()
top_100 = sorted_wins.head(100).sum()
top_1000 = sorted_wins.head(1000).sum()

print(f"Total wins: {total_wins:,}")
print(f"Top 10 campaigns: {top_10:,} wins ({top_10/total_wins*100:.2f}%)")
print(f"Top 100 campaigns: {top_100:,} wins ({top_100/total_wins*100:.2f}%)")
print(f"Top 1000 campaigns: {top_1000:,} wins ({top_1000/total_wins*100:.2f}%)")

print("\n3. VENDOR WIN STATISTICS")
print("-" * 80)
vendor_wins = df.groupby('VENDOR_ID').agg({
    'IS_WINNER': ['sum', 'count', 'mean'],
    'CAMPAIGN_ID': 'nunique',
    'AUCTION_ID': 'nunique'
}).reset_index()
vendor_wins.columns = ['VENDOR_ID', 'n_wins', 'n_bids', 'win_rate', 'n_campaigns', 'n_auctions']

print(f"Total vendors: {len(vendor_wins):,}")
print(f"Mean wins per vendor: {vendor_wins['n_wins'].mean():.2f}")
print(f"Mean campaigns per vendor: {vendor_wins['n_campaigns'].mean():.2f}")

sorted_vendor_wins = vendor_wins['n_wins'].sort_values(ascending=False)
top_10_vendors = sorted_vendor_wins.head(10).sum()
top_100_vendors = sorted_vendor_wins.head(100).sum()

print(f"\nVendor win concentration:")
print(f"Top 10 vendors: {top_10_vendors:,} wins ({top_10_vendors/total_wins*100:.2f}%)")
print(f"Top 100 vendors: {top_100_vendors:,} wins ({top_100_vendors/total_wins*100:.2f}%)")

print("\n4. MONOPOLISTIC VS COMPETITIVE AUCTIONS")
print("-" * 80)
print("Analyzing winner diversity in sample auctions...")

sample_auctions = auction_stats.sample(min(5000, len(auction_stats)), random_state=42)['AUCTION_ID'].values
auction_types = []

for auction_id in tqdm(sample_auctions, desc="Analyzing auction winner diversity"):
    auction_winners = df[(df['AUCTION_ID'] == auction_id) & (df['IS_WINNER'] == True)]
    if len(auction_winners) > 0:
        n_unique_vendors = auction_winners['VENDOR_ID'].nunique()
        n_unique_campaigns = auction_winners['CAMPAIGN_ID'].nunique()
        n_winners = len(auction_winners)
        
        if n_winners == 1:
            auction_type = 'Single winner'
        elif n_unique_vendors == 1:
            auction_type = 'Monopolistic (1 vendor)'
        elif n_unique_vendors == n_winners:
            auction_type = 'Fully diverse'
        else:
            auction_type = 'Partially diverse'
        
        auction_types.append({
            'type': auction_type,
            'n_winners': n_winners,
            'n_vendors': n_unique_vendors,
            'n_campaigns': n_unique_campaigns
        })

auction_types_df = pd.DataFrame(auction_types)
type_counts = auction_types_df['type'].value_counts()

print(f"\nAuction winner diversity (n={len(auction_types_df):,} auctions):")
for auction_type, count in type_counts.items():
    pct = count / len(auction_types_df) * 100
    print(f"  {auction_type:30s}: {count:6,} ({pct:5.2f}%)")

print("\n5. REPEAT WINNERS")
print("-" * 80)
high_volume_campaigns = campaign_wins[campaign_wins['n_bids'] >= 100]
print(f"Campaigns with 100+ bids: {len(high_volume_campaigns):,}")
print(f"  Mean win rate: {high_volume_campaigns['win_rate'].mean()*100:.2f}%")
print(f"  Median win rate: {high_volume_campaigns['win_rate'].median()*100:.2f}%")
print(f"  Max win rate: {high_volume_campaigns['win_rate'].max()*100:.2f}%")

dominant_campaigns = high_volume_campaigns[high_volume_campaigns['win_rate'] > 0.95]
print(f"\nDominant campaigns (>95% win rate, 100+ bids): {len(dominant_campaigns):,}")
print(f"  Total wins: {dominant_campaigns['n_wins'].sum():,} ({dominant_campaigns['n_wins'].sum()/total_wins*100:.2f}% of all wins)")

print("\n" + "="*80)

## Section 5: Auction Efficiency Metrics

In [None]:
print("="*80)
print("SECTION 5: AUCTION EFFICIENCY METRICS")
print("="*80)

print("\n1. FILL RATE BY PLACEMENT")
print("-" * 80)
placement_fill = auction_stats.groupby('PLACEMENT').agg({
    'AUCTION_ID': 'count',
    'n_winners': lambda x: (x > 0).sum()
})
placement_fill['fill_rate'] = placement_fill['n_winners'] / placement_fill['AUCTION_ID'] * 100
placement_fill.columns = ['n_auctions', 'auctions_with_winners', 'fill_rate_%']
print("\nFill rate by placement:")
print(placement_fill.to_string())

print("\n2. WINNER SLOT UTILIZATION")
print("-" * 80)
# Estimate available slots from max winners per placement
max_winners_by_placement = auction_stats.groupby('PLACEMENT')['n_winners'].max()
mean_winners_by_placement = auction_stats.groupby('PLACEMENT')['n_winners'].mean()

print("\nSlot utilization by placement:")
for placement in sorted(auction_stats['PLACEMENT'].unique()):
    max_slots = max_winners_by_placement[placement]
    mean_filled = mean_winners_by_placement[placement]
    utilization = (mean_filled / max_slots * 100) if max_slots > 0 else 0
    print(f"  Placement {placement}:")
    print(f"    Max observed winners: {max_slots:.0f}")
    print(f"    Mean winners: {mean_filled:.2f}")
    print(f"    Utilization: {utilization:.2f}%")

print("\n3. BID QUALITY METRICS")
print("-" * 80)
print("\nMean metrics by bid density:")
auction_stats['density_cat'] = pd.cut(auction_stats['n_bids'], 
                                       bins=[0, 5, 20, 50, 100, float('inf')],
                                       labels=['Very Low (<5)', 'Low (5-20)', 'Medium (20-50)', 
                                              'High (50-100)', 'Very High (100+)'])

density_quality = auction_stats.groupby('density_cat').agg({
    'AUCTION_ID': 'count',
    'mean_bid': 'mean',
    'mean_quality': 'mean',
    'win_rate': 'mean',
    'n_winners': 'mean'
}).round(4)
density_quality.columns = ['n_auctions', 'avg_bid', 'avg_quality', 'avg_win_rate', 'avg_winners']
print(density_quality.to_string())

print("\n4. AUCTION EFFICIENCY SCORE")
print("-" * 80)
print("\nComputing efficiency score = (n_winners / n_bids) × fill_indicator...")
auction_stats['has_winner'] = (auction_stats['n_winners'] > 0).astype(int)
auction_stats['efficiency_score'] = (auction_stats['n_winners'] / auction_stats['n_bids']) * auction_stats['has_winner']

print(f"Mean efficiency score: {auction_stats['efficiency_score'].mean():.4f}")
print(f"Median efficiency score: {auction_stats['efficiency_score'].median():.4f}")

print("\nEfficiency by placement:")
placement_efficiency = auction_stats.groupby('PLACEMENT')['efficiency_score'].mean().round(4)
for placement, efficiency in placement_efficiency.items():
    print(f"  Placement {placement}: {efficiency:.4f}")

print("\n5. OVERALL MARKETPLACE HEALTH")
print("-" * 80)
overall_fill = (auction_stats['n_winners'] > 0).mean()
overall_utilization = auction_stats['n_winners'].mean() / auction_stats['n_winners'].max()
mean_competition = auction_stats['n_bids'].mean()
mean_diversity = auction_stats['n_campaigns'].mean()

print(f"Overall fill rate: {overall_fill*100:.2f}%")
print(f"Overall slot utilization: {overall_utilization*100:.2f}%")
print(f"Mean competition (bids/auction): {mean_competition:.2f}")
print(f"Mean campaign diversity: {mean_diversity:.2f}")

print("\nMarketplace health indicators:")
if overall_fill > 0.95:
    print("  ✓ EXCELLENT fill rate (>95%)")
elif overall_fill > 0.85:
    print("  ✓ GOOD fill rate (85-95%)")
else:
    print("  ⚠ SUBOPTIMAL fill rate (<85%)")

if mean_competition > 30:
    print("  ✓ HEALTHY competition (>30 bids/auction)")
elif mean_competition > 15:
    print("  ✓ MODERATE competition (15-30 bids/auction)")
else:
    print("  ⚠ LOW competition (<15 bids/auction)")

if mean_diversity > 20:
    print("  ✓ HIGH campaign diversity (>20 campaigns/auction)")
elif mean_diversity > 10:
    print("  ✓ MODERATE diversity (10-20 campaigns/auction)")
else:
    print("  ⚠ LOW diversity (<10 campaigns/auction)")

print("\n" + "="*80)
print("ANALYSIS COMPLETE")
print("="*80)