In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import numpy as np
import pandas as pd
from sklearn import metrics
from tqdm import tqdm_notebook as tqdm
from itertools import product
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

In [0]:
test_data = pd.read_csv("/content/drive/My Drive/MAAI/data/test.csv")
valid_data = pd.read_csv("/content/drive/My Drive/MAAI/data/validation.csv")
train_data = pd.read_csv("/content/drive/My Drive/MAAI/data/train.csv")

In [0]:
len([i for i in valid_data['click'].values if i == 1])

202

### Strategies

In [0]:
def stategy_ortb_1(list_predictCTR, c = 50, lambd = 9e-5):
  return np.sqrt(c/lambd * list_predictCTR + c**2) - c

In [0]:
def stategy_ortb_2(list_predictCTR, c = 50, lambd = 9e-5):
  c_lambd = c * lambd
  t = list_predictCTR + np.sqrt(c ** 2 * (lambd ** 2) \
                                + np.power(list_predictCTR, 2))
  return c * (np.power(t/c_lambd, 1/3) - np.power(c_lambd/t, 1/3))

In [0]:
max_price = valid_data['payprice'].max()
def stategy_ortb_d(list_predictCTR, max_price = max_price, volumn = 277191, budget = 6250):
  return 2 * list_predictCTR * np.power(budget * max_price**2 / volumn, 1.5)

In [0]:
average_pay_price = valid_data['payprice'].sum() / len(valid_data['payprice'])
def stategy_linear(list_predictCTR, base_price = average_pay_price):
  average_CTR = len([d for d in valid_data['click'].values 
                  if d == 1]) / len(valid_data)
  return base_price * list_predictCTR / average_CTR 

In [0]:
average_pay_price = valid_data['payprice'].sum() / len(valid_data['payprice'])
def clipped_stategy_linear(list_predictCTR, base_price = average_pay_price, click_threshold = 0.0005):
  list_predictCTR_copy = np.copy(list_predictCTR)
  list_predictCTR_copy[list_predictCTR_copy < click_threshold] = 0
  return stategy_linear(list_predictCTR_copy, base_price = base_price)

In [0]:
average_pay_price = valid_data['payprice'].sum() / len(valid_data['payprice'])
def stategy_quadratic_power2(list_predictCTR, base_price = average_pay_price):
  average_CTR = len([d for d in valid_data['click'].values 
                  if d == 1]) / len(valid_data)
  return base_price * np.power(100 * list_predictCTR / average_CTR, 2)

In [0]:
average_pay_price = valid_data['payprice'].sum() / len(valid_data['payprice'])
def stategy_quadratic_power2_unscaled(list_predictCTR, base_price = average_pay_price):
  average_CTR = len([d for d in valid_data['click'].values 
                  if d == 1]) / len(valid_data)
  return base_price * np.power(list_predictCTR / average_CTR, 2)

### Re-Calibration

In [0]:
def re_calibration(pred_y_list, negative_downsampling_rate):
  return pred_y_list / (pred_y_list + (1 - pred_y_list) / negative_downsampling_rate)

### Competition Simulator

In [0]:
class Agent(object):
  def __init__(self, budget):
    self.num_impressions = 0
    self.num_clicks = 0
    self.ctr = 0
    self.budget = budget
    self.ori_budgent = budget
    self.cost = 0
    
  def bidding_function(self, bid, required_price):
    raise NotImplementedError
    
  def after_winning_updates(self):
    raise NotImplementedError

class GivenPriceAgent(Agent):
  def __init__(self, budget, price_list):
    super().__init__(budget)
    self.price_list = price_list
    
  def bidding_function(self, bid_index):
    return self.price_list[bid_index]
  def after_winning_updates(self):
    self.num_impressions += 1
    self.ctr = self.num_clicks / self.num_impressions
    self.cost = self.ori_budgent - self.budget

In [0]:
def multi_agents_bidding_given_values(agents_list, required_prices, is_clicked, required_preces_per_impression):
  len_bid = len(required_prices)
  len_agent = len(agents_list)
  for index in tqdm(range(len_bid)):
      bid_prices = [agent.bidding_function(index) for agent in agents_list]
      winner_index = np.argmax(bid_prices)
      if required_prices[index] <= bid_prices[winner_index] \
           and agents_list[winner_index].budget >= required_preces_per_impression[index]:
        agents_list[winner_index].num_clicks += is_clicked[index]
        agents_list[winner_index].budget -= required_preces_per_impression[index] 
        agents_list[winner_index].after_winning_updates()
  ctr = sum([agent.ctr for agent in agents_list])
  num_clicks = sum([agent.num_clicks for agent in agents_list]) 
  num_impressions = sum([agent.num_impressions for agent in agents_list]) 
  return agents_list, ctr, num_clicks, num_impressions

### Logistic Regression (Single Layer MLP) CTR estimator related
(Currently best result)

In [0]:
valid_y_pred_1 = np.fromfile("/content/drive/My Drive/MAAI/click_predictions/validation_predictions1_76.bin", dtype=float)

In [0]:
valid_y_pred_1_classfication = np.array([1 if valid_y_pred_1[i] > 0.5 else 0 for i in range(len(valid_y_pred_1))])

In [0]:
confusion_matrix(valid_data['click'].values, valid_y_pred_1_classfication)

array([[297704,   6019],
       [   126,     76]])

In [0]:
valid_y_pred_1_calibred = re_calibration(valid_y_pred_1, 0.01)

In [0]:
valid_y_pred_1_calibred_classfication = np.array([1 if valid_y_pred_1_calibred[i] > 0.5 else 0 for i in range(len(valid_y_pred_1_calibred))])

In [0]:
confusion_matrix(valid_data['click'].values, valid_y_pred_1_calibred_classfication)

array([[303592,    131],
       [   186,     16]])

#### Linear Strategy base bidding price grid search

In [0]:
clicks_res = []
ctr_res = []
cost_res = []
cpm_res = []
cpc_res = []
best_click = -1
best_click_baseprice = -1
for baseprice in tqdm(np.arange(300, 1000, 1)):
  valid_linear_1 = stategy_linear(valid_y_pred_1_calibred, baseprice)
  agents_list = [GivenPriceAgent(6250, valid_linear_1)]
  required_prices = valid_data['payprice'].values
  is_clicked = valid_data['click'].values
  required_preces_per_impression = required_prices / 1000
  agents_list, ctr, num_clicks, num_impressions = multi_agents_bidding_given_values(agents_list, required_prices, is_clicked, required_preces_per_impression)
  clicks_res.append(num_clicks)
  ctr_res.append(ctr)
  cost_res.append(agents_list[0].cost)
  cpm_res.append(agents_list[0].cost / num_impressions)
  cpc_res.append(agents_list[0].cost / num_clicks)
  if num_clicks > best_click:
    best_click = num_clicks
    best_click_baseprice = baseprice

In [0]:
clicks_res = []
ctr_res = []
cost_res = []
cpm_res = []
cpc_res = []
best_click = -1
best_click_baseprice = -1
for baseprice in tqdm(np.arange(75, 80, 0.1)):
  valid_linear_1 = stategy_linear(valid_y_pred_1_calibred, baseprice)
  agents_list = [GivenPriceAgent(6250, valid_linear_1)]
  required_prices = valid_data['payprice'].values
  is_clicked = valid_data['click'].values
  required_preces_per_impression = required_prices / 1000
  agents_list, ctr, num_clicks, num_impressions = multi_agents_bidding_given_values(agents_list, required_prices, is_clicked, required_preces_per_impression)
  clicks_res.append(num_clicks)
  ctr_res.append(ctr)
  cost_res.append(agents_list[0].cost)
  cpm_res.append(agents_list[0].cost / num_impressions)
  cpc_res.append(agents_list[0].cost / num_clicks)
  if num_clicks > best_click:
    best_click = num_clicks
    best_click_baseprice = baseprice

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))




In [0]:
cost_res

[3742.7890000002844,
 3746.7030000002874,
 3749.284000000288,
 3752.612000000287,
 3754.7950000002875,
 3757.814000000289,
 3760.5230000002916,
 3762.6400000002895,
 3765.8370000002906,
 3769.13200000029,
 3772.845000000285,
 3776.2050000002837,
 3779.245000000283,
 3782.5860000002795,
 3784.678000000279,
 3786.9850000002803,
 3789.70300000028,
 3793.5570000002804,
 3796.008000000282,
 3799.5600000002814,
 3802.3220000002852,
 3804.4720000002853,
 3806.7590000002833,
 3810.526000000285,
 3813.404000000284,
 3816.0850000002856,
 3819.4270000002844,
 3820.8800000002843,
 3823.8920000002827,
 3826.4980000002834,
 3829.6220000002827,
 3831.656000000283,
 3833.9180000002834,
 3835.9570000002846,
 3839.461000000282,
 3842.016000000285,
 3845.150000000283,
 3848.2490000002836,
 3850.9790000002854,
 3854.1780000002855,
 3856.6000000002855,
 3860.342000000285,
 3861.549000000286,
 3863.9560000002825,
 3866.738000000283,
 3868.2820000002844,
 3871.620000000284,
 3874.4110000002875,
 3876.8400000

In [0]:
lr_linear_stategy_grid_search2 = pd.DataFrame({
     "base_price":np.arange(300, 1000, 1),
     "num_clicks_on_valid":clicks_res,
     "ctr": ctr_res,
     "cost": cost_res,
     "cpm": cpm_res,
     "cpc": cpc_res
})
lr_linear_stategy_grid_search2.to_csv("/content/drive/My Drive/MAAI/bid_predictions/lr_linear_bidding_stategy_base_price_grid_search2.csv", sep=',', encoding='utf-8')

### Nerual Network (4 hidden layers) CTR estimator related

In [0]:
nn4_valid_y_pred_1 = np.fromfile("/content/drive/My Drive/MAAI/click_predictions/validation_predictions_0875.bin", dtype=float)

In [0]:
nn4_valid_y_pred_1_classification = np.array([1 if nn4_valid_y_pred_1[i] > 0.5 else 0 for i in range(len(nn4_valid_y_pred_1))])

In [0]:
confusion_matrix(valid_data['click'].values, nn4_valid_y_pred_1_classification)

array([[301828,   1895],
       [   132,     70]])

In [0]:
nn4_valid_y_pred_1_calibred = re_calibration(nn4_valid_y_pred_1, 0.01)

In [0]:
nn4_valid_y_pred_1_calibred_classfication = np.array([1 if nn4_valid_y_pred_1_calibred[i] > 0.5 else 0 for i in range(len(nn4_valid_y_pred_1_calibred))])

In [0]:
confusion_matrix(valid_data['click'].values, nn4_valid_y_pred_1_calibred_classfication)

array([[303681,     42],
       [   195,      7]])

#### Linear Strategy base bidding price grid search

In [30]:
clicks_res = []
ctr_res = []
cost_res = []
cpm_res = []
cpc_res = []
best_click = -1
best_click_baseprice = -1
for baseprice in tqdm(np.arange(0, 200, 1)):
  valid_linear_1 = stategy_linear(nn4_valid_y_pred_1_calibred, baseprice)
  agents_list = [GivenPriceAgent(6250, valid_linear_1)]
  required_prices = valid_data['payprice'].values
  is_clicked = valid_data['click'].values
  required_preces_per_impression = required_prices / 1000
  agents_list, ctr, num_clicks, num_impressions = multi_agents_bidding_given_values(agents_list, required_prices, is_clicked, required_preces_per_impression)
  clicks_res.append(num_clicks)
  ctr_res.append(ctr)
  cost_res.append(agents_list[0].cost)
  cpm_res.append(agents_list[0].cost / num_impressions)
  cpc_res.append(agents_list[0].cost / num_clicks)
  if num_clicks > best_click:
    best_click = num_clicks
    best_click_baseprice = baseprice

HBox(children=(IntProgress(value=0, max=200), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))



HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

KeyboardInterrupt: ignored

In [0]:
best_click

In [0]:
best_click_baseprice

In [0]:
lr_linear_stategy_grid_search2 = pd.DataFrame({
     "base_price":np.arange(300, 1000, 1),
     "num_clicks_on_valid":clicks_res,
     "ctr": ctr_res,
     "cost": cost_res,
     "cpm": cpm_res,
     "cpc": cpc_res
})
lr_linear_stategy_grid_search2.to_csv("/content/drive/My Drive/MAAI/bid_predictions/lr_linear_bidding_stategy_base_price_grid_search2.csv", sep=',', encoding='utf-8')

### Nerual Network (2 hidden layers) CTR estimator related

In [0]:
nn2_valid_y_pred_2 = np.fromfile("/content/drive/My Drive/MAAI/click_predictions/validation_predictions_new0.bin", dtype=float)

In [0]:
nn2_valid_y_pred_2_classification = np.array([1 if nn2_valid_y_pred_2[i] > 0.5 else 0 for i in range(len(nn2_valid_y_pred_2))])

In [0]:
confusion_matrix(valid_data['click'].values, nn2_valid_y_pred_2_classification)

array([[303643,     80],
       [   151,     51]])

In [0]:
nn2_valid_y_pred_2_calibred = re_calibration(nn2_valid_y_pred_2, 0.125)

In [0]:
nn2_valid_y_pred_2_calibred_classfication = np.array([1 if nn2_valid_y_pred_2_calibred[i] > 0.5 else 0 for i in range(len(nn2_valid_y_pred_2_calibred))])

In [0]:
confusion_matrix(valid_data['click'].values, nn2_valid_y_pred_2_calibred_classfication)

array([[303719,      4],
       [   196,      6]])

#### Linear Strategy base bidding price grid search

In [0]:
clicks_res = []
ctr_res = []
cost_res = []
cpm_res = []
cpc_res = []
best_click = -1
best_click_baseprice = -1
for baseprice in tqdm(np.arange(1, 200, 1)):
  valid_linear_1 = stategy_linear(nn2_valid_y_pred_2_calibred, baseprice)
  agents_list = [GivenPriceAgent(6250, valid_linear_1)]
  required_prices = valid_data['payprice'].values
  is_clicked = valid_data['click'].values
  required_preces_per_impression = required_prices / 1000
  agents_list, ctr, num_clicks, num_impressions = multi_agents_bidding_given_values(agents_list, required_prices, is_clicked, required_preces_per_impression)
  clicks_res.append(num_clicks)
  ctr_res.append(ctr)
  cost_res.append(agents_list[0].cost)
  cpm_res.append(agents_list[0].cost / num_impressions)
  cpc_res.append(agents_list[0].cost / num_clicks)
  if num_clicks > best_click:
    best_click = num_clicks
    best_click_baseprice = baseprice

HBox(children=(IntProgress(value=0, max=199), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

In [0]:
best_click_baseprice

136

In [0]:
best_click

162

In [0]:
lr_linear_stategy_grid_search2 = pd.DataFrame({
     "base_price":np.arange(300, 1000, 1),
     "num_clicks_on_valid":clicks_res,
     "ctr": ctr_res,
     "cost": cost_res,
     "cpm": cpm_res,
     "cpc": cpc_res
})
lr_linear_stategy_grid_search2.to_csv("/content/drive/My Drive/MAAI/bid_predictions/lr_linear_bidding_stategy_base_price_grid_search2.csv", sep=',', encoding='utf-8')

 #### Quadratic Strategy base bidding price grid search

In [0]:
clicks_res = []
ctr_res = []
cost_res = []
cpm_res = []
cpc_res = []
best_click = -1
best_click_baseprice = -1
for baseprice in tqdm(np.arange(0.017, 0.019, 0.0001)):
  valid_linear_1 = stategy_quadratic_power2(nn2_valid_y_pred_2_calibred, baseprice)
  agents_list = [GivenPriceAgent(6250, valid_linear_1)]
  required_prices = valid_data['payprice'].values
  is_clicked = valid_data['click'].values
  required_preces_per_impression = required_prices / 1000
  agents_list, ctr, num_clicks, num_impressions = multi_agents_bidding_given_values(agents_list, required_prices, is_clicked, required_preces_per_impression)
  clicks_res.append(num_clicks)
  ctr_res.append(ctr)
  cost_res.append(agents_list[0].cost)
  cpm_res.append(agents_list[0].cost / num_impressions)
  cpc_res.append(agents_list[0].cost / num_clicks)
  if num_clicks > best_click:
    best_click = num_clicks
    best_click_baseprice = baseprice

HBox(children=(IntProgress(value=0, max=20), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))




In [0]:
best_click_baseprice

0.0173

In [0]:
best_click

160

In [0]:
clicks_res

[159,
 159,
 159,
 160,
 160,
 160,
 160,
 160,
 160,
 160,
 160,
 159,
 158,
 158,
 157,
 157,
 156,
 156,
 156,
 156]

In [0]:
lr_linear_stategy_grid_search2 = pd.DataFrame({
     "base_price":np.arange(300, 1000, 1),
     "num_clicks_on_valid":clicks_res,
     "ctr": ctr_res,
     "cost": cost_res,
     "cpm": cpm_res,
     "cpc": cpc_res
})
lr_linear_stategy_grid_search2.to_csv("/content/drive/My Drive/MAAI/bid_predictions/lr_linear_bidding_stategy_base_price_grid_search2.csv", sep=',', encoding='utf-8')

In [0]:
  valid_quadratic_2 = stategy_quadratic_power2_unscaled(nn2_valid_y_pred_2_calibred, 176)
  agents_list = [GivenPriceAgent(6250, valid_quadratic_2)]
  required_prices = valid_data['payprice'].values
  is_clicked = valid_data['click'].values
  required_preces_per_impression = required_prices / 1000
  agents_list, ctr, num_clicks, num_impressions = multi_agents_bidding_given_values(agents_list, required_prices, is_clicked, required_preces_per_impression)

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))




In [0]:
num_clicks

160

### XGBoosting (predictor 347, max depth 5) CTR estimator related

In [0]:
xgb_valid_y_pred_2 = np.genfromtxt("/content/drive/My Drive/MAAI/click_predictions/Luke_xgb_predictor347_maxdepth5_ctr_prediction_valid.csv", dtype=float)

In [0]:
xgb_valid_y_pred_2_classfication = np.array([1 if xgb_valid_y_pred_2[i] > 0.5 else 0 for i in range(len(xgb_valid_y_pred_2))])

In [43]:
confusion_matrix(valid_data['click'].values, xgb_valid_y_pred_2_classfication)

array([[302429,   1294],
       [   121,     81]])

In [0]:
xgb_valid_y_pred_2_calibred = re_calibration(xgb_valid_y_pred_2, 0.01)

In [0]:
xgb_valid_y_pred_2_calibred_classfication = np.array([1 if xgb_valid_y_pred_2_calibred[i] > 0.0005 else 0 for i in range(len(xgb_valid_y_pred_2_calibred))])

In [0]:
confusion_matrix(valid_data['click'].values, xgb_valid_y_pred_2_calibred_classfication)

array([[233082,  70641],
       [    30,    172]])

#### Linear Strategy base bidding price grid search

In [12]:
clicks_res = []
ctr_res = []
cost_res = []
cpm_res = []
cpc_res = []
best_click = -1
best_click_baseprice = -1
for baseprice in tqdm(np.arange(0, 100, 1)):
  xgb_valid_linear_2 = stategy_linear(xgb_valid_y_pred_2_calibred, baseprice)
  agents_list = [GivenPriceAgent(6250, xgb_valid_linear_2)]
  required_prices = valid_data['payprice'].values
  is_clicked = valid_data['click'].values
  required_preces_per_impression = required_prices / 1000
  agents_list, ctr, num_clicks, num_impressions = multi_agents_bidding_given_values(agents_list, required_prices, is_clicked, required_preces_per_impression)
  clicks_res.append(num_clicks)
  ctr_res.append(ctr)
  cost_res.append(agents_list[0].cost)
  cpm_res.append(agents_list[0].cost / num_impressions)
  cpc_res.append(agents_list[0].cost / num_clicks)
  if num_clicks > best_click:
    best_click = num_clicks
    best_click_baseprice = baseprice

HBox(children=(IntProgress(value=0), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))



HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))




In [13]:
best_click

161

In [17]:
best_click_baseprice

96

In [0]:
xgb_linear_stategy_grid_search2 = pd.DataFrame({
    "base_price":np.arange(300, 1000, 1),
    "num_clicks_on_valid":clicks_res,
    "ctr": ctr_res,
    "cost": cost_res,
    "cpm": cpm_res,
    "cpc": cpc_res
})
xgb_linear_stategy_grid_search2.to_csv("/content/drive/My Drive/MAAI/bid_predictions/xgb_linear_bidding_stategy_base_price_grid_search2.csv", sep=',', encoding='utf-8')

 #### Unscalared Quadratic Strategy base bidding price grid search

In [0]:
clicks_res = []
ctr_res = []
cost_res = []
cpm_res = []
cpc_res = []
best_click = -1
best_click_baseprice = -1
for baseprice in tqdm(np.arange(150, 200, 1)):
  valid_quadratic_unscaled_1 = stategy_quadratic_power2_unscaled(xgb_valid_y_pred_2_calibred, baseprice)
  agents_list = [GivenPriceAgent(6250, valid_quadratic_unscaled_1)]
  required_prices = valid_data['payprice'].values
  is_clicked = valid_data['click'].values
  required_preces_per_impression = required_prices / 1000
  agents_list, ctr, num_clicks, num_impressions = multi_agents_bidding_given_values(agents_list, required_prices, is_clicked, required_preces_per_impression)
  clicks_res.append(num_clicks)
  ctr_res.append(ctr)
  cost_res.append(agents_list[0].cost)
  cpm_res.append(agents_list[0].cost / num_impressions)
  cpc_res.append(agents_list[0].cost / num_clicks)
  if num_clicks > best_click:
    best_click = num_clicks
    best_click_baseprice = baseprice

In [0]:
best_click_baseprice

179

In [0]:
best_click

174

In [0]:
lr_linear_stategy_grid_search2 = pd.DataFrame({
     "base_price":np.arange(300, 1000, 1),
     "num_clicks_on_valid":clicks_res,
     "ctr": ctr_res,
     "cost": cost_res,
     "cpm": cpm_res,
     "cpc": cpc_res
})
lr_linear_stategy_grid_search2.to_csv("/content/drive/My Drive/MAAI/bid_predictions/lr_linear_bidding_stategy_base_price_grid_search2.csv", sep=',', encoding='utf-8')

### Multi-agent competition

In [46]:
'''
agents_list = [GivenPriceAgent(6250, stategy_ortb_1(valid_y_pred_1_calibred)),
               GivenPriceAgent(6250, stategy_ortb_2(valid_y_pred_1_calibred)),
               
               GivenPriceAgent(6250, 20 * stategy_linear(valid_y_pred_1_calibred)),
               GivenPriceAgent(6250, stategy_linear(valid_y_pred_1_calibred) + 100000),
               
               GivenPriceAgent(6250, 20 * stategy_linear(nn2_valid_y_pred_2_calibred)),
               GivenPriceAgent(6250, stategy_linear(nn2_valid_y_pred_2_calibred) + 100000),
               
               GivenPriceAgent(6250, 20 * stategy_quadratic_power2_unscaled(xgb_valid_y_pred_2_calibred)),
               GivenPriceAgent(6250, stategy_quadratic_power2_unscaled(xgb_valid_y_pred_2_calibred) + 100000),
                
               GivenPriceAgent(6250, stategy_linear(valid_y_pred_1_calibred, 206)),
               GivenPriceAgent(6250, stategy_linear(nn2_valid_y_pred_2_calibred, 136)),
               GivenPriceAgent(6250, stategy_quadratic_power2_unscaled(xgb_valid_y_pred_2_calibred, 179)),
               GivenPriceAgent(6250, nn2_xgb_2_blend_valid_pred)]
'''
agents_list = [ GivenPriceAgent(6250, stategy_quadratic_power2_unscaled(xgb_valid_y_pred_2_calibred, 179))]

required_prices = valid_data['payprice'].values
is_clicked = valid_data['click'].values
required_preces_per_impression = required_prices / 1000
agents_list, ctr, num_clicks, num_impressions = multi_agents_bidding_given_values(agents_list, required_prices, is_clicked, required_preces_per_impression)

HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

In [47]:
[agent.num_clicks for agent in agents_list]

[174]

### Write to documents

In [0]:
test_y_pred_1 = np.fromfile("/content/drive/My Drive/MAAI/click_predictions/test_predictions1.bin", dtype=float)
test_y_pred_1_calibred = re_calibration(test_y_pred_1, 0.005)
test1_linear2_base206 = stategy_linear(test_y_pred_1_calibred, 206)

In [0]:
test_y_pred_1 = np.fromfile("/content/drive/My Drive/MAAI/click_predictions/test_predictions1.bin", dtype=float)
test_y_pred_1_calibred = re_calibration(test_y_pred_1, 0.005)
test1_ortb2_100000000 = stategy_ortb_2(test_y_pred_1_calibred, 100000000)

In [0]:
test_y_pred_1 = np.fromfile("/content/drive/My Drive/MAAI/click_predictions/test_predictions1.bin", dtype=float)
test_y_pred_1_calibred = re_calibration(test_y_pred_1, 0.005)

In [0]:
nn_test_y_pred_1 = np.fromfile("/content/drive/My Drive/MAAI/click_predictions/test_predictions_0875.bin", dtype=float)
nn_test_y_pred_1_calibred = re_calibration(nn_test_y_pred_1, 0.01)

In [0]:
nn_test_y_pred_2 = np.fromfile("/content/drive/My Drive/MAAI/click_predictions/test_predictions_new0.bin", dtype=float)
nn_test_y_pred_2_calibred = re_calibration(nn_test_y_pred_2, 0.125)

In [0]:
xgb_test_y_pred_2 = pd.read_csv("/content/drive/My Drive/MAAI/click_predictions/Luke_xgb_predictor347_maxdepth5_ctr_prediction_test.csv", dtype=float, header=None)
xgb_test_y_pred_2_calibred = re_calibration(xgb_test_y_pred_2.values, 0.01).squeeze()

In [0]:
xgb_test_y_pred_182clicks_mul_20 = 20 * pd.read_csv("/content/drive/My Drive/MAAI/bid_predictions/23th_pred_unscaled_power_xgb_2.csv").bidprice.values.squeeze()

In [0]:
sub15th_pred_linear_nn_new0 = pd.read_csv("/content/drive/My Drive/MAAI/bid_predictions/15th_pred_linear_nn_new0.csv").bidprice.values.squeeze()

In [10]:
np.mean(sub15th_pred_linear_nn_new0)

147.80054456899077

In [0]:
sub15th_pred_linear_nn_new0 *= 1.2 + 10

In [0]:
sub25th_pred_linear_nn_0before100000_after_doubled = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":sub15th_pred_linear_nn_new0
})
sub25th_pred_linear_nn_0before100000_after_doubled.to_csv("/content/drive/My Drive/MAAI/bid_predictions/sub25th_pred_linear_nn_0before100000_after_doubled.csv", sep=',', encoding='utf-8')

In [0]:
sub28th_pred_linear_nn_0before225000_after_1andhalf = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":sub15th_pred_linear_nn_new0
})
sub28th_pred_linear_nn_0before225000_after_1andhalf.to_csv("/content/drive/My Drive/MAAI/bid_predictions/28th_pred_linear_nn_0before225000_after_1andhalf.csv", sep=',', encoding='utf-8')

In [0]:
sub29th_pred_linear_nn_01_2times_plus10 = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":sub15th_pred_linear_nn_new0
})
sub29th_pred_linear_nn_01_2times_plus10.to_csv("/content/drive/My Drive/MAAI/bid_predictions/sub29th_pred_linear_nn_01_2times_plus10.csv", sep=',', encoding='utf-8')

In [0]:
first_linear_valid_aver_plus_001 = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":linear
})
first_linear_valid_aver_plus_001.to_csv("/content/drive/My Drive/MAAI/bid_predictions/first_linear_valid_aver_plus_001.csv", sep=',', encoding='utf-8')

In [0]:
third_pred_ortb2 = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":test1_ortb1_2
})
third_pred_ortb2.to_csv("/content/drive/My Drive/MAAI/bid_predictions/3rd_pred_ortb2.csv", sep=',', encoding='utf-8')

In [0]:
fourth_pred_linear = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":test1_linear
})
fourth_pred_linear.to_csv("/content/drive/My Drive/MAAI/bid_predictions/4th_pred_linear.csv", sep=',', encoding='utf-8')

In [0]:
fifth_pred_linear = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":test1_linear2_base108
})
fifth_pred_linear.to_csv("/content/drive/My Drive/MAAI/bid_predictions/5th_pred_linear_base108.csv", sep=',', encoding='utf-8')

In [0]:
sixth_pred_linear = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":test1_linear2_base196
})
sixth_pred_linear.to_csv("/content/drive/My Drive/MAAI/bid_predictions/6th_pred_linear_base196.csv", sep=',', encoding='utf-8')

In [0]:
seventh_pred_ortb2_100000 = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":test1_ortb2_100000
})
seventh_pred_ortb2_100000.to_csv("/content/drive/My Drive/MAAI/bid_predictions/7th_pred_ortb2_100000.csv", sep=',', encoding='utf-8')

In [0]:
# Current 2nd for criteria 2
eieghth_pred_linear = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":test1_linear2_base206
})
eieghth_pred_linear.to_csv("/content/drive/My Drive/MAAI/bid_predictions/8th_pred_linear_base206.csv", sep=',', encoding='utf-8')

In [0]:
sub11th_pred_linear = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":clipped_stategy_linear(test_y_pred_1_calibred, 1000000000001)
})
sub11th_pred_linear.to_csv("/content/drive/My Drive/MAAI/bid_predictions/11th_minclip_linear_extr_large_base.csv", sep=',', encoding='utf-8')

In [0]:
sub12th_pred_linear_nn0875 = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":stategy_linear(nn_test_y_pred_1_calibred, 325)
})
sub12th_pred_linear_nn0875.to_csv("/content/drive/My Drive/MAAI/bid_predictions/12th_pred_linear_nn0875.csv", sep=',', encoding='utf-8')

In [0]:
sub13th_pred_linear_nn0875_base_average = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":stategy_linear(nn_test_y_pred_1_calibred)
})
sub13th_pred_linear_nn0875_base_average.to_csv("/content/drive/My Drive/MAAI/bid_predictions/13th_pred_linear_nn0875_baseprice_average.csv", sep=',', encoding='utf-8')

In [0]:
sub14th_pred_linear_lclip_uclip_baised45 = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":lr_upper_and_lower_clipped_linear_test
})
sub14th_pred_linear_lclip_uclip_baised45.to_csv("/content/drive/My Drive/MAAI/bid_predictions/14th_pred_linear_lclip_uclip_baised45.csv", sep=',', encoding='utf-8')

In [0]:
# Current best for criteria 2
sub15th_pred_linear_nn_new0 = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":stategy_linear(nn_test_y_pred_2_calibred, 136)
})
sub15th_pred_linear_nn_new0.to_csv("/content/drive/My Drive/MAAI/bid_predictions/15th_pred_linear_nn_new0.csv", sep=',', encoding='utf-8')

In [0]:
sub17th_pred_unscaled_power_nn_new0 = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":stategy_quadratic_power2_unscaled(nn_test_y_pred_2_calibred, 175)
})
sub17th_pred_unscaled_power_nn_new0.to_csv("/content/drive/My Drive/MAAI/bid_predictions/17th_pred_unscaled_power_nn_new0.csv", sep=',', encoding='utf-8')

In [0]:
sub18th_pred_unscaled_power_xgb_2 = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":stategy_quadratic_power2_unscaled(xgb_test_y_pred_2_calibred, 179)
})
sub18th_pred_unscaled_power_xgb_2.to_csv("/content/drive/My Drive/MAAI/bid_predictions/18th_pred_unscaled_power_xgb_2.csv", sep=',', encoding='utf-8')

In [0]:
sub24th_pred_unscaled_power_xgb_click182_mul20 = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":xgb_test_y_pred_182clicks_mul_20
})
sub24th_pred_unscaled_power_xgb_click182_mul20.to_csv("/content/drive/My Drive/MAAI/bid_predictions/24th_pred_unscaled_power_xgb_click182_mul20.csv", sep=',', encoding='utf-8')

###  Grid Search for ORTB Parameter

#### ORTB 1

In [0]:
clicks_res = []
best_click = -1
best_click_lambd = -1
search = np.arange(1e-9, 1e-1, 1e-4)
print(len(search))
for i in tqdm(range(len(search))):
  print(i)
  lambd = search[i]
  valid_ortb_1 = stategy_ortb_1(valid_y_pred_1_calibred, lambd = lambd)
  agents_list = [GivenPriceAgent(6250, valid_ortb_1)]
  required_prices = valid_data['payprice'].values
  is_clicked = valid_data['click'].values
  required_preces_per_impression = required_prices / 1000
  agents_list, ctr, num_clicks, num_impressions = multi_agents_bidding_given_values(agents_list, required_prices, is_clicked, required_preces_per_impression)
  clicks_res.append(num_clicks)
  if num_clicks > best_click:
    best_click = num_clicks
    best_click_lambd = lambd

In [0]:
best_click_lambd

0.00010000100000000001

In [0]:
best_click

63

In [0]:
ortb1_stategy_grid_search = pd.DataFrame({
    "ortb_1_lambd":search,
    "clicks":clicks_res
})
ortb1_stategy_grid_search.to_csv("/content/drive/My Drive/MAAI/data/ortb1_stategy_grid_search.csv", sep=',', encoding='utf-8')

#### ORTB 2

In [0]:
clicks_res = []
best_click = -1
best_click_lambd = -1
search = np.arange(1e-9, 1e-1, 1e-4)
print(len(search))
for i in tqdm(range(len(search))):
  print(i)
  lambd = search[i]
  valid_ortb_2 = stategy_ortb_2(valid_y_pred_1_calibred, lambd = lambd)
  agents_list = [GivenPriceAgent(6250, valid_ortb_2)]
  required_prices = valid_data['payprice'].values
  is_clicked = valid_data['click'].values
  required_preces_per_impression = required_prices / 1000
  agents_list, ctr, num_clicks, num_impressions = multi_agents_bidding_given_values(agents_list, required_prices, is_clicked, required_preces_per_impression)
  clicks_res.append(num_clicks)
  if num_clicks > best_click:
    best_click = num_clicks
    best_click_lambd = lambd

1000


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

0


HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

1


HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

2


HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

3


HBox(children=(IntProgress(value=0, max=303925), HTML(value='')))

In [0]:
best_click_lambd

In [0]:
best_click

In [0]:
clicks_res

[60,
 61,
 54,
 48,
 41,
 34,
 32,
 31,
 30,
 28,
 26,
 25,
 24,
 22,
 22,
 21,
 21,
 21,
 21,
 21,
 21,
 19,
 17,
 17,
 17,
 16,
 16,
 15,
 15,
 15,
 15,
 14,
 13,
 12,
 12,
 11,
 11,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 9,
 9,
 9,
 8,
 7,
 7,
 7,
 6,
 6,
 6,
 6,
 6,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 3,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,

In [0]:
ortb2_stategy_grid_search = pd.DataFrame({
    "ortb_1_lambd":search,
    "clicks":clicks_res
})
ortb2_stategy_grid_search.to_csv("/content/drive/My Drive/MAAI/data/ortb2_stategy_grid_search.csv", sep=',', encoding='utf-8')

## Explorationary Strategy

In [0]:
sub4 = pd.read_csv("/content/drive/My Drive/MAAI/bid_predictions/4th_pred_linear.csv", sep=',', encoding='utf-8')

In [0]:
sum(sub4['bidprice'].values)- sum(sub4['bidprice'].values[sub4['bidprice'].values > 300])

15979279.363143334

In [0]:
valid_data['bidprice'][valid_data['bidprice'].values > 300]

Series([], Name: bidprice, dtype: int64)

In [0]:
train_data['bidprice'][train_data['bidprice'].values > 300]

Series([], Name: bidprice, dtype: int64)

In [0]:
lr_upper_and_lower_clipped_linear_valid = clipped_stategy_linear(valid_y_pred_1_calibred)
lr_upper_and_lower_clipped_linear_valid = np.where(lr_upper_and_lower_clipped_linear_valid < 300, lr_upper_and_lower_clipped_linear_valid, 301)

In [0]:
sum(clipped_stategy_linear(valid_y_pred_1_calibred))

58935695.89651259

In [0]:
(sum(clipped_stategy_linear(valid_y_pred_1_calibred)) - sum(lr_upper_and_lower_clipped_linear_valid)) / len(valid_y_pred_1_calibred)

160.1740134785774

In [0]:
argumented_lr_upper_and_lower_clipped_linear_valid = lr_upper_and_lower_clipped_linear_valid + 160.1740134785774

In [0]:
lr_upper_clipped_linear_valid = stategy_linear(valid_y_pred_1_calibred)
lr_upper_clipped_linear_valid = np.where(lr_upper_clipped_linear_valid < 300, lr_upper_clipped_linear_valid, 301)

In [0]:
argumented_lr_upper_and_lower_clipped_linear_valid

array([160.17401348, 160.17401348, 160.17401348, ..., 160.17401348,
       160.17401348, 325.84915544])

In [0]:
lr_upper_and_lower_clipped_linear_test = clipped_stategy_linear(test_y_pred_1_calibred)
lr_upper_and_lower_clipped_linear_test = 45 + np.where(lr_upper_and_lower_clipped_linear_test < 300, lr_upper_and_lower_clipped_linear_test, 301)

In [0]:
lr_upper_and_lower_clipped_linear_test

array([216.18188755, 346.        ,  45.        , ...,  45.        ,
       199.59347124,  45.        ])

### Blending Attemps

#### XGB (predicter 374, max depth 5, features 886) / Power unscaled + NN (2 layer) / Linear

In [0]:
xgb_valid_y_pred_2 = np.genfromtxt("/content/drive/My Drive/MAAI/click_predictions/Luke_xgb_predictor347_maxdepth5_ctr_prediction_valid.csv", dtype=float)
xgb_valid_y_pred_2_calibred = re_calibration(xgb_valid_y_pred_2, 0.01)
nn2_valid_y_pred_2 = np.fromfile("/content/drive/My Drive/MAAI/click_predictions/validation_predictions_new0.bin", dtype=float)
nn2_valid_y_pred_2_calibred = re_calibration(nn2_valid_y_pred_2, 0.125)
nn2_xgb_2_blend_valid_pred = 0.1 * stategy_linear(nn2_valid_y_pred_2_calibred, 136) + 0.9 * stategy_quadratic_power2_unscaled(xgb_valid_y_pred_2_calibred, 179)  

In [0]:
sub18th_xgb_test_y_pred_2_bidding_res = pd.read_csv("/content/drive/My Drive/MAAI/bid_predictions/18th_pred_unscaled_power_xgb_2.csv", sep=',').bidprice.values.squeeze()
sub15th_nn2_test_y_pred_2_bidding_res = pd.read_csv("/content/drive/My Drive/MAAI/bid_predictions/15th_pred_linear_nn_new0.csv", sep=',').bidprice.values.squeeze()
nn2_xgb_2_blend_test_pred = 0.1 * sub15th_nn2_test_y_pred_2_bidding_res + 0.9 * sub18th_xgb_test_y_pred_2_bidding_res

In [0]:
sub20th_pred_nn_1_xgb_9_blend_1 = pd.DataFrame({
     "bidid": test_data['bidid'].values,
     "bidprice":nn2_xgb_2_blend_test_pred
})
sub20th_pred_nn_1_xgb_9_blend_1.to_csv("/content/drive/My Drive/MAAI/bid_predictions/20th_pred_nn_1_xgb_9_blend_1.csv", sep=',', encoding='utf-8')