# Import Data

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pickle
import time
import os.path
import math
from xgboost import XGBClassifier
from sklearn.externals import joblib

n_bytes = 2**31
max_bytes = 2**31 - 1

class Time_Tracking():
    
    start_time = None
    
    def start_tracking(self):
        
        self.start_time = time.time()
    
    def stop_tracking(self):
        
        print("Time used:", round(((time.time() - self.start_time)/60),2), ' minutes')


def load_pickle(file_path):
    
    bytes_in = bytearray(0)
    input_size = os.path.getsize(file_path)
    with open(file_path, 'rb') as f_in:
        for _ in range(0, input_size, max_bytes):
            bytes_in += f_in.read(max_bytes)
    
    return pickle.loads(bytes_in)


In [2]:
model = joblib.load("xgb.pickle.dat")

train = load_pickle('train_df_after_preprocessing.pkl')
valid = load_pickle('valid_df_after_preprocessing.pkl')
test = load_pickle('test_df_after_preprocessing.pkl')

valid_click = valid['click']
valid_bidprice = valid['bidprice']
valid_payprice = valid['payprice']
valid_X = valid.drop(['click', 'bidprice', 'payprice'], axis = 1)

### 暂时没时间 tune xgboost, 先 tune 各种strategy

In [3]:
# strategy 1: linear bidding(base_line)
# strategy 2: OCTR
# strategy 3: threshold linear bidding 
# strategy 4: linear bidding with floor price 
# strategy 5: sigmoid threshold bid 

# threshold 可能效果不是很好，因为 threshold = 0.5 的时候，failed = 61, 

In [3]:
valid_ctr_prediction = model.predict_proba(valid_X)

In [5]:
def LinearBidding(pCTR, basePrice, avgCTR):
    return basePrice*pCTR/avgCTR

In [6]:
def EvaluateClick( pCTR, budget, base_price, avg_CTR, valid_payprice, valid_click):
    
    bid_price = LinearBidding(pCTR, base_price, avg_CTR)
    auction = bid_price >= valid_payprice
    win_bid_ix = [index for index, auction in enumerate(auction) if auction]

    clicks = sum(valid_click[win_bid_ix])
    spend = sum(valid_payprice[win_bid_ix])

    sorted_pCTR_index = sorted(range(len(pCTR)), key=lambda k: pCTR[k])

    if spend > budget:
        print('overspend')
        for bid_id in sorted_pCTR_index:

            spend -= valid_bidprice[bid_id]
            clicks -= valid_click[bid_id]

            if spend <= budget:
                break

    print('spend:', spend, ' click:', clicks)
    return spend,clicks

EvaluateClick( valid_ctr_prediction[:,1], 6250000, 5, 0.0007, valid_payprice, valid_click)

overspend
spend: 6249848  click: 202


(6249848, 202)

In [9]:
# search the best base_price
base_prices = [0.001, 0.01, 0.1, 1, 10, 100,1000]
spend = []
clicks = []

for base in base_prices:
    print(base)
    s,c = EvaluateClick( valid_ctr_prediction[:,1], 6250000, base, 0.0007, valid_payprice, valid_click)
    spend.append(s)
    clicks.append(c)

0.001
spend: 19  click: 0
0.01
spend: 21056  click: 4
0.1
spend: 3151105  click: 111
1
overspend
spend: 6249851  click: 202
10
overspend
spend: 6249895  click: 202
100
overspend
spend: 6249895  click: 202
1000
overspend
spend: 6249895  click: 202


### The best base_price is 1 with average CTR = 0.0007

# Prediction

In [15]:
test_prediction = model.predict_proba(test)[:,1]

In [20]:
budget = 6250000
bid_price = test_prediction/0.0007
sorted_pCTR_index = sorted(range(len(bid_price)), key=lambda k: bid_price[k])
spend = sum(bid_price)

if spend > budget:
    print('overspend')
    for bid_id in sorted_pCTR_index:

        spend -= bid_price[bid_id]
        bid_price[bid_id] = 0
        
        if spend <= budget:
            break

print('spend:', spend)

overspend
spend: 6249846.81995


In [17]:
test.head()

Unnamed: 0,weekday,hour,slotprice,OS_android,OS_ios,OS_linux,OS_mac,OS_other,OS_windows,browser_chrome,...,slotvisibility_ThirdView,advertiser_1458,advertiser_2259,advertiser_2261,advertiser_2821,advertiser_2997,advertiser_3358,advertiser_3386,advertiser_3427,advertiser_3476
0,0,12,10,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,1,0
1,3,14,5,1,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,5,19,0,0,0,0,0,0,1,0,...,0,1,0,0,0,0,0,0,0,0
3,0,21,30,0,0,0,0,0,1,0,...,0,0,0,0,1,0,0,0,0,0
4,2,20,50,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,0


In [33]:
import pandas as pd

submission = pd.read_csv('Group_xx.csv')
submission['bidprice'] = bid_price
submission.to_csv('Group_9.csv')