In [1]:
print("Hello World")

Hello World


# Practical Budget Pacing Algorithms, PID Controler and Simulation Test Bed for eBay Marketplace Sponsored Search

Recreation of the simulations and techniques used by Ebay in this paper http://papers.adkdd.org/2023/papers/adkdd23-nguyen-practical.pdf


This notebook adds:
- Adds a new PID control method and compares with the pacing algorithms used in the paper
- Simulates user-interaction data, as we don't have access to it. 



In [6]:
import pandas as pd


In [2]:
# Simulate 1440 minutes in a day
# Simulate bids for competing campaigns (3 campaigns with different budgets)
# Calculate Metrics for each campaign
# Use different pacing algorithms to spend budget
# Add PID to each campaign
# Build a nice UI


# Create dataframe that has
# Keyword, Item, Time, pCTR(probability of click through rate), bid




In [231]:
# Simulate 1440 minutes in a day, M campaigns competing for N keywords, pCTR, bid

# Define data
data = {
    # Each item is a book that represents a campaign
    'Item': ['Time Series Analysis: Forecasting and Control', 'Practical Statistics for Data Scientists', 'Designing Data-Intensive Applications'],
    # They are all bidding on the same keyword
    'Keyword': ['statistics book', 'statistics book', 'statistics book'],
    # Probability of click through rate (This is a guess as we don't have the actual data)
    'pCTR': [0.03, 0.04, 0.02],
    # Bid amount (this will be randomized around this central amount)
    'Bid': [0.5, 0.45, 0.55]
}

# Create DataFrame
items_keywords_df = pd.DataFrame(data)

# Print DataFrame
display(items_keywords_df)

Unnamed: 0,Item,Keyword,pCTR,Bid
0,Time Series Analysis: Forecasting and Control,statistics book,0.03,0.5
1,Practical Statistics for Data Scientists,statistics book,0.04,0.45
2,Designing Data-Intensive Applications,statistics book,0.02,0.55


In [None]:
# Simple Simulation with no budget constraints

# Simulate 1440 minutes in a day
    # Could have peaks of traffic around the day
    # Could have multiple impressions
    
# Simulate a bid around a keyword
 # Bid amount is a random number around the central bid amount
 # The click through rate is a random number around the central pCTR
 
  
 # Business Metrics
  # Total Clicks
  # Budget Spend
  # CTR
  # Cost Per Click
  
# Pacing Evaluation Metrics
 # Pacing Error. Smoothness of spend over the day


In [232]:
# Simulate a bid around a keyword
   # This should will take modifiers from control algorithms
import numpy as np
def auction(df):
    """
    Gets df with bidding information and simulates an auction
    
    Returns winner and price paid
    
    """
    bid_dict = {}
    for index, id in df.iterrows():
        bid_dict[index] = id['Bid']- np.random.uniform(0.0, 0.2)
    winner = max(bid_dict, key=bid_dict.get)
    # price paid is the bid of the second highest bidder
    price_paid = sorted(bid_dict.values(), reverse=True)[1]
    return winner, price_paid

winner, price_paid = auction(items_keywords_df)

print(f"The winner is {items_keywords_df.iloc[winner]['Item']}")
print(f"The price paid is {price_paid}")

The winner is Designing Data-Intensive Applications
The price paid is 0.3213853474297117


In [233]:
# Simulate unlimited budget over 1440 minutes
# iterate over 1440 minutes (flat traffic)
# Save the winner and price paid
# Build a dataframe with the results

simulation_results = []
for minute in range(1440):
    winner, price_paid = auction(items_keywords_df)
    result = {
        'Minute': minute,
        'Winner': items_keywords_df.iloc[winner]['Item'],
        'Price Paid': price_paid,
        "pCTR": items_keywords_df.iloc[winner]['pCTR'] + np.random.normal(-0.01, 0.01),
    }
    simulation_results.append(result)
    
simulation_results_df = pd.DataFrame(simulation_results)

In [234]:
simulation_results_df.groupby("Winner").agg({"Price Paid": "mean", "pCTR": "mean", "Minute": "count"})

Unnamed: 0_level_0,Price Paid,pCTR,Minute
Winner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Designing Data-Intensive Applications,0.396556,0.010336,974
Practical Statistics for Data Scientists,0.393527,0.030848,119
Time Series Analysis: Forecasting and Control,0.407834,0.019537,347


In [235]:

# Add number of clicks, total spend, CTR, CPC
def calc_clicks(pCTR, total_impressions = 1000):
    return total_impressions * pCTR

def calc_total_spend(clicks, price_paid):
    return clicks * price_paid

simulation_results_df['Clicks'] = simulation_results_df.apply(lambda x: calc_clicks(total_impressions=1000, pCTR=x['pCTR']), axis=1)
simulation_results_df['Total Spend'] = simulation_results_df.apply(lambda x: calc_total_spend(x['Clicks'], x['Price Paid']), axis=1)


simulation_results_df.groupby("Winner").agg({"Price Paid": "mean", "pCTR": "mean", "Minute": "count", "Clicks": "sum", "Total Spend": "sum"})

# add cost per click
simulation_results_df['Cost Per Click'] = simulation_results_df['Total Spend'] / simulation_results_df['Clicks']

simulation_results_df.groupby("Winner").agg({"Price Paid": "mean", "pCTR": "mean", "Minute": "count", "Clicks": "sum", "Total Spend": "sum", "Cost Per Click": "mean"})


# Cost per click are very similar for all campaigns
# Can introduce quality scores and impression charges?


Unnamed: 0_level_0,Price Paid,pCTR,Minute,Clicks,Total Spend,Cost Per Click
Winner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Designing Data-Intensive Applications,0.396556,0.010336,974,10067.269386,3978.784386,0.396556
Practical Statistics for Data Scientists,0.393527,0.030848,119,3670.941687,1443.947454,0.393527
Time Series Analysis: Forecasting and Control,0.407834,0.019537,347,6779.467485,2788.527621,0.407834
