In [1]:
print("Hello World")

Hello World


# Practical Budget Pacing Algorithms, PID Controler and Simulation Test Bed for eBay Marketplace Sponsored Search

Recreation of the simulations and techniques used by Ebay in this paper http://papers.adkdd.org/2023/papers/adkdd23-nguyen-practical.pdf


This notebook adds:
- Adds a new PID control method and compares with the pacing algorithms used in the paper
- Simulates user-interaction data, as we don't have access to it. 



In [6]:
import pandas as pd


In [2]:
# Simulate 1440 minutes in a day
# Simulate bids for competing campaigns (3 campaigns with different budgets)
# Calculate Metrics for each campaign
# Use different pacing algorithms to spend budget
# Add PID to each campaign
# Build a nice UI


# Create dataframe that has
# Keyword, Item, Time, pCTR(probability of click through rate), bid




In [334]:
# Simulate 1440 minutes in a day, M campaigns competing for N keywords, pCTR, bid

# Define data
data = {
    # Each item is a book that represents a campaign
    'Item': ['Time Series Analysis: Forecasting and Control', 'Practical Statistics for Data Scientists', 'Designing Data-Intensive Applications'],
    # They are all bidding on the same keyword
    'Keyword': ['statistics book', 'statistics book', 'statistics book'],
    # Probability of click through rate (This is a guess as we don't have the actual data)
    'pCTR': [0.03, 0.04, 0.02],
    # Bid amount (this will be randomized around this central amount)
    'Bid': [0.5, 0.48, 0.55],
    'Budget': [10000,5000,2000],
    'Spend': [0.0,0.0,0.0],
}

# Create DataFrame
items_keywords_df = pd.DataFrame(data)

# Print DataFrame
display(items_keywords_df)

Unnamed: 0,Item,Keyword,pCTR,Bid,Budget,Spend
0,Time Series Analysis: Forecasting and Control,statistics book,0.03,0.5,10000,0.0
1,Practical Statistics for Data Scientists,statistics book,0.04,0.48,5000,0.0
2,Designing Data-Intensive Applications,statistics book,0.02,0.55,2000,0.0


In [None]:
# Simple Simulation with no budget constraints

# Simulate 1440 minutes in a day
    # Could have peaks of traffic around the day
    # Could have multiple impressions
    
# Simulate a bid around a keyword
 # Bid amount is a random number around the central bid amount
 # The click through rate is a random number around the central pCTR
 
  
 # Business Metrics
  # Total Clicks
  # Budget Spend
  # CTR
  # Cost Per Click
  
# Pacing Evaluation Metrics
 # Pacing Error. Smoothness of spend over the day


In [332]:
# Simulate a bid around a keyword
   # This should will take modifiers from control algorithms
import numpy as np
def auction(df):
    """
    Gets df with bidding information and simulates an auction
    
    Returns winner and price paid
    
    """
    bid_dict = {}
    for index, id in df.iterrows():
        bid_dict[index] = id['Bid']- np.random.uniform(0.0, 0.2)
    winner = max(bid_dict, key=bid_dict.get)
    try:
        # price paid is the bid of the second highest bidder
        price_paid = sorted(bid_dict.values(), reverse=True)[1]
    except:
        # If there is only one bidder, the price paid is the bid of the winner
        price_paid = bid_dict[winner]
    return winner, price_paid

winner, price_paid = auction(items_keywords_df)

print(f"The winner is {items_keywords_df.loc[winner]['Item']}")
print(f"The price paid is {price_paid}")

{0: 0.44522564141361276, 1: 0.37873406042373675}
Winner is 0
Price paid is 0.37873406042373675
The winner is Time Series Analysis: Forecasting and Control
The price paid is 0.37873406042373675


In [333]:
items_keywords_df.loc[2]

KeyError: 2

In [335]:
# Simulate unlimited budget over 1440 minutes
# iterate over 1440 minutes (flat traffic)
# Save the winner and price paid
# Build a dataframe with the results

simulation_results = []
for minute in range(1440):
    
    # Remove winner if Spend is greater than Budget
    items_keywords_df = items_keywords_df[items_keywords_df['Spend'] < items_keywords_df['Budget']]
    
    
        
    winner, price_paid = auction(items_keywords_df)
    result = {
        'Minute': minute,
        'Winner': items_keywords_df.loc[winner]['Item'],
        'Price Paid': price_paid,
        "pCTR": items_keywords_df.loc[winner]['pCTR'] + np.random.normal(-0.01, 0.01),
    }
    
    # Update Spend so far for the winner
    items_keywords_df.loc[winner, 'Spend'] = items_keywords_df.loc[winner, 'Spend'] + calc_total_spend(calc_clicks(result['pCTR']), price_paid)
    
    simulation_results.append(result)
    
simulation_results_df = pd.DataFrame(simulation_results)

{0: 0.46781612874209666, 1: 0.43043200352126726, 2: 0.5132355583268766}
Winner is 2
Price paid is 0.46781612874209666
{0: 0.41712160178302127, 1: 0.4093328023841768, 2: 0.5291187577131824}
Winner is 2
Price paid is 0.41712160178302127
{0: 0.4698446873233565, 1: 0.34093633881797103, 2: 0.35031465354977864}
Winner is 0
Price paid is 0.35031465354977864
{0: 0.4867206711539973, 1: 0.3640682584267151, 2: 0.3677953755127163}
Winner is 0
Price paid is 0.3677953755127163
{0: 0.4169381032584073, 1: 0.3957164436153925, 2: 0.4236203643075027}
Winner is 2
Price paid is 0.4169381032584073
{0: 0.43761679800830494, 1: 0.47841915832186854, 2: 0.4516826054263545}
Winner is 1
Price paid is 0.4516826054263545
{0: 0.4522690175895163, 1: 0.46549801842425864, 2: 0.4894562501394801}
Winner is 2
Price paid is 0.46549801842425864
{0: 0.3540787913006023, 1: 0.3307024122782052, 2: 0.40663275568929536}
Winner is 2
Price paid is 0.3540787913006023
{0: 0.3709985603390823, 1: 0.47572875834189776, 2: 0.47076229051230

In [336]:
simulation_results_df.groupby("Winner").agg({"Price Paid": "mean", "pCTR": "mean", "Minute": "count"})

Unnamed: 0_level_0,Price Paid,pCTR,Minute
Winner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Designing Data-Intensive Applications,0.407584,0.009117,539
Practical Statistics for Data Scientists,0.379075,0.029962,349
Time Series Analysis: Forecasting and Control,0.378344,0.019842,552


In [337]:

# Add number of clicks, total spend, CTR, CPC
def calc_clicks(pCTR, total_impressions = 1000):
    return total_impressions * pCTR

def calc_total_spend(clicks, price_paid):
    return clicks * price_paid

simulation_results_df['Clicks'] = simulation_results_df.apply(lambda x: calc_clicks(total_impressions=1000, pCTR=x['pCTR']), axis=1)
simulation_results_df['Total Spend'] = simulation_results_df.apply(lambda x: calc_total_spend(x['Clicks'], x['Price Paid']), axis=1)


simulation_results_df.groupby("Winner").agg({"Price Paid": "mean", "pCTR": "mean", "Minute": "count", "Clicks": "sum", "Total Spend": "sum"})

# add cost per click
simulation_results_df['Cost Per Click'] = simulation_results_df['Total Spend'] / simulation_results_df['Clicks']

simulation_results_df.groupby("Winner").agg({"Price Paid": "mean", "pCTR": "mean", "Minute": "count", "Clicks": "sum", "Total Spend": "sum", "Cost Per Click": "mean"})


# Cost per click are very similar for all campaigns
# Can introduce quality scores and impression charges?


Unnamed: 0_level_0,Price Paid,pCTR,Minute,Clicks,Total Spend,Cost Per Click
Winner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Designing Data-Intensive Applications,0.407584,0.009117,539,4914.195901,2002.178603,0.407584
Practical Statistics for Data Scientists,0.379075,0.029962,349,10456.767335,3971.153331,0.379075
Time Series Analysis: Forecasting and Control,0.378344,0.019842,552,10952.940577,4131.759713,0.378344


In [338]:
items_keywords_df

Unnamed: 0,Item,Keyword,pCTR,Bid,Budget,Spend
0,Time Series Analysis: Forecasting and Control,statistics book,0.03,0.5,10000,4131.759713
1,Practical Statistics for Data Scientists,statistics book,0.04,0.48,5000,3971.153331


In [None]:
# Introduce budget constraints
# Introduce pacing algorithms


# Budget
# Remaining time left: minutes slots left
# Remaining Click opportunity: Depends on forecasting Model (SHOULD do this)

# Error: Budget rate error




In [None]:
# Implement Budget Cap (stops bidding when budget is reached)
# Total Spend is recorded