# Analyzing OpenRTB Bids

This notebook analyzes bid request/response pairs from OpenRTB data to extract placement IDs with their corresponding bid prices from each demand source. The following fields are commonly used to identify placement IDs in OpenRTB:

1. `imp.ext.gpid` - Global Placement ID
2. `imp.tagid` - Tag ID
3. `imp.ext.data.pbadslot` - Publisher Ad Slot
4. `imp.id` - Impression ID
5. `imp.ext.dfp_id` - DFP Ad Unit Code

We'll extract these placement identifiers along with the bid prices for each demand source.

In [1]:
import sys
import subprocess

def install_package(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

try:
    import pandas as pd
except ImportError:
    install_package('pandas')
    import pandas as pd

try:
    import json
except ImportError:
    install_package('json')
    import json

try:
    from glob import glob
except ImportError:
    install_package('glob')
    from glob import glob

In [2]:
# Find all bid data files in the bids directory
bid_files = glob("../bids/*.json")
print(f"Found {len(bid_files)} bid data files")

Found 1 bid data files


In [3]:
# Function to extract placement ID from an impression object
def extract_placement_id(imp):
    # Common placement ID fields in OpenRTB
    placement_fields = [
        ('ext', 'gpid'),           # Global Placement ID
        ('ext', 'data', 'pbadslot'), # Publisher ad slot
        ('ext', 'tid'),            # Transaction ID
        ('ext', 'dfp_id'),         # DFP Ad Unit Code
        ('ext', 'divid'),          # Div ID
        ('ext', 'ae_ad_id'),       # Ad Exchange Ad ID
        ('ext', 'ae_ad_type'),     # Ad Exchange Ad Type
        ('ext', 'ae_a_id'),        # Ad Exchange Advertiser ID
        ('tagid',),                # Tag ID
        ('id',)                    # Impression ID
    ]
    
    for field_path in placement_fields:
        current = imp
        try:
            for field in field_path:
                current = current[field]
            if current and isinstance(current, str):
                return current
        except (KeyError, TypeError):
            continue
    
    # Fallback: check for placement-related fields in adomain, adid, etc.
    if 'ext' in imp and isinstance(imp['ext'], dict):
        for key, value in imp['ext'].items():
            if 'placement' in key.lower() or 'slot' in key.lower() or 'tag' in key.lower():
                if isinstance(value, str):
                    return value
    
    return "unknown_placement_id"

# Function to extract bid information from bid responses
def extract_bids_from_responses(responses):
    bid_data = []
    
    for response in responses:
        if 'body' not in response or response['statusCode'] != 200:
            continue
            
        try:
            response_body = json.loads(response['body'])
        except json.JSONDecodeError:
            continue
        
        # Get request ID to match with requests
        request_id = response.get('requestId', 'unknown')
        
        # Extract bids from seatbid
        if 'seatbid' in response_body:
            for seatbid in response_body['seatbid']:
                if 'bid' in seatbid:
                    for bid in seatbid['bid']:
                        # Extract placement ID from bid.impid by matching with request impressions
                        # We'll need to get this from the corresponding request
                        placement_id = bid.get('impid', 'unknown_placement')
                        bid_price = bid.get('price', 0)
                        bid_currency = response_body.get('cur', 'USD')
                        demand_source = seatbid.get('seat', 'unknown_seat')
                        adomain = ', '.join(bid.get('adomain', [])) if 'adomain' in bid else 'unknown_adomain'
                        
                        bid_data.append({
                            'request_id': request_id,
                            'placement_id': placement_id,
                            'bid_price': bid_price,
                            'bid_currency': bid_currency,
                            'demand_source': demand_source,
                            'advertiser_domain': adomain,
                            'creative_id': bid.get('crid', 'unknown_creative'),
                            'creative_width': bid.get('w', 0),
                            'creative_height': bid.get('h', 0)
                        })
    
    return bid_data

# Function to extract placement IDs from bid requests
def extract_placements_from_requests(requests):
    placement_data = {}
    
    for request in requests:
        if 'body' not in request:
            continue
        
        try:
            request_body = json.loads(request['body'])
        except json.JSONDecodeError:
            continue
        
        # Get request ID
        request_id = request.get('requestId', 'unknown')
        
        # Extract impressions
        if 'imp' in request_body:
            for imp in request_body['imp']:
                placement_id = extract_placement_id(imp)
                imp_id = imp.get('id', 'unknown_imp')
                
                # Store mapping from impression ID to placement ID
                if request_id not in placement_data:
                    placement_data[request_id] = {}
                placement_data[request_id][imp_id] = placement_id
    
    return placement_data

In [4]:
# Process all bid data files
all_bid_data = []

for bid_file in bid_files:
    print(f"Processing bid data file: {bid_file}")
    
    # Load the bid data
    with open(bid_file, 'r') as f:
        bid_data = json.load(f)
    
    # Extract requests and responses
    requests = bid_data.get('requests', [])
    responses = bid_data.get('responses', [])
    
    print(f"Found {len(requests)} requests with placement information")
    print(f"Found {len(responses)} bid responses")
    
    # Extract placement IDs from requests
    placement_map = extract_placements_from_requests(requests)
    
    # Extract bids from responses
    bids = extract_bids_from_responses(responses)
    
    # Add placement IDs to bid data by matching request and impression IDs
    for bid in bids:
        req_id = bid['request_id']
        imp_id = bid['placement_id']  # This is actually the impid from the bid
        
        # Look up the actual placement ID from our mapping
        if req_id in placement_map and imp_id in placement_map[req_id]:
            bid['placement_id'] = placement_map[req_id][imp_id]
    
    all_bid_data.extend(bids)
    print(f"Extracted {len(bids)} bid records")

Processing bid data file: ../bids/openrtb-bid-data-2025-10-01T01-35-07.json
Found 18 requests with placement information
Found 18 bid responses
Extracted 13 bid records


In [5]:
# Create a DataFrame from the extracted bid data
print(f"Creating DataFrame with {len(all_bid_data)} records")
df = pd.DataFrame(all_bid_data)

# If the DataFrame is empty, create a sample with the right structure
if df.empty:
    df = pd.DataFrame(columns=['request_id', 'placement_id', 'bid_price', 'bid_currency', 
                               'demand_source', 'advertiser_domain', 'creative_id', 
                               'creative_width', 'creative_height'])

Creating DataFrame with 13 records


In [6]:
print(f"Total bid records: {len(df)}")
print("\nSummary statistics for bid prices:")
df[['bid_price']].describe()

Total bid records: 13

Summary statistics for bid prices:


Unnamed: 0,bid_price
count,13.0
mean,8.955477
std,9.632592
min,0.0285
25%,0.425335
50%,1.2296
75%,19.0
max,22.0


In [7]:
print("\nTop 10 highest bids:")
df.sort_values('bid_price', ascending=False).head(10)[['request_id', 'placement_id', 'bid_price', 'bid_currency', 'demand_source', 'advertiser_domain']]


Top 10 highest bids:


Unnamed: 0,request_id,placement_id,bid_price,bid_currency,demand_source,advertiser_domain
1,26424.669,/1001609/Discuss_Web_HOME_BBLive,22.0,USD,1,icmarkets.com
2,26424.669,/1001609/Discuss_Web_HOME_BBLive2,19.0,USD,1,icmarkets.com
3,26424.669,/1001609/Discuss_Web_HOME_BBLive3,19.0,USD,1,icmarkets.com
4,26424.669,/1001609/Discuss_Web_HOME_BBLive4,19.0,USD,1,icmarkets.com
5,26424.669,/1001609/Discuss_Web_HOME_BB1,19.0,USD,1,icmarkets.com
6,26424.669,/1001609/Discuss_Web_HOME_MR1,15.0,USD,4194632,hkexpress.com
9,13772.399,2,1.2296,USD,1495,interactivebrokers.com.hk
8,13772.399,1,1.0498,USD,1495,interactivebrokers.com.hk
0,26424.67,/1001609/Discuss_Web_HOME_MR1,0.481463,USD,3426,hkexpress.com
12,13772.401,/5129/SkyNews/home#ad-block-728x90-1,0.425335,USD,48503,adobe.com


In [8]:
print("\nTop 10 most frequent placement IDs:")
# Count occurrences of each placement ID
placement_counts = df['placement_id'].value_counts().reset_index()
placement_counts.columns = ['placement_id', 'frequency']
placement_counts.head(10)


Top 10 most frequent placement IDs:


Unnamed: 0,placement_id,frequency
0,/1001609/Discuss_Web_HOME_MR1,3
1,/1001609/Discuss_Web_HOME_BBLive,1
2,/1001609/Discuss_Web_HOME_BBLive2,1
3,/1001609/Discuss_Web_HOME_BBLive3,1
4,/1001609/Discuss_Web_HOME_BBLive4,1
5,/1001609/Discuss_Web_HOME_BB1,1
6,1,1
7,2,1
8,3,1
9,4,1


In [9]:
print("\nAverage bid price by demand source (top 10):")
# Group by demand source and calculate average bid price and count
demand_source_stats = df.groupby('demand_source').agg({
    'bid_price': 'mean',
    'placement_id': 'count'
}).reset_index()
demand_source_stats.columns = ['demand_source', 'avg_bid_price', 'bid_count']
demand_source_stats.sort_values('avg_bid_price', ascending=False).head(10)


Average bid price by demand source (top 10):


Unnamed: 0,demand_source,avg_bid_price,bid_count
0,1,19.6,5
3,4194632,15.0,1
1,1495,0.50288,5
2,3426,0.481463,1
4,48503,0.425335,1


In [10]:
print("\nBid price distribution by placement ID (top 5 placements with highest average bid price):")
# Group by placement ID and calculate bid statistics
placement_stats = df.groupby('placement_id').agg({
    'bid_price': ['mean', 'min', 'max'],
    'request_id': 'count'
}).reset_index()

# Flatten column names
placement_stats.columns = ['placement_id', 'avg_bid_price', 'min_bid', 'max_bid', 'bid_count']
placement_stats.sort_values('avg_bid_price', ascending=False).head(5)


Bid price distribution by placement ID (top 5 placements with highest average bid price):


Unnamed: 0,placement_id,avg_bid_price,min_bid,max_bid,bid_count
1,/1001609/Discuss_Web_HOME_BBLive,22.0,22.0,22.0,1
0,/1001609/Discuss_Web_HOME_BB1,19.0,19.0,19.0,1
2,/1001609/Discuss_Web_HOME_BBLive2,19.0,19.0,19.0,1
3,/1001609/Discuss_Web_HOME_BBLive3,19.0,19.0,19.0,1
4,/1001609/Discuss_Web_HOME_BBLive4,19.0,19.0,19.0,1
