In [1]:
import pandas as pd
import numpy as np
import os
import random
import matplotlib.pyplot as plt
import operator
from sklearn.pipeline import Pipeline
from sklearn.pipeline import _name_estimators
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve,auc
from sklearn.metrics import log_loss
from sklearn.model_selection import cross_val_score
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
from sklearn.preprocessing import LabelEncoder
from sklearn.externals import six
from sklearn.base import clone
from sklearn.linear_model import LogisticRegression  
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
import xgboost
from xgboost import XGBClassifier

In [3]:
from sklearn.externals import joblib

In [4]:
X_resampled = joblib.load("pCTR_X_train_resampled.pkl")
y_resampled = joblib.load("pCTR_y_train_resampled.pkl")
X_valid = joblib.load("pCTR_X_validation.pkl")

In [5]:
train = pd.read_csv("train.csv")
validation = pd.read_csv("validation.csv")
test = pd.read_csv("test.csv")

### XGBOOST bid price

In [9]:
xgb_model = XGBClassifier(n_estimators = 700, max_depth = 5, learning_rate = 0.1,
                                                          random_state = 100)

In [10]:
features_total_names = joblib.load("total_features_basebid.pkl")
estimator = xgb_model
X_train = pd.DataFrame(columns=features_total_names, data=X_resampled)
y_train = y_resampled
estimator.fit(X_train,y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=5, min_child_weight=1, missing=None, n_estimators=700,
       n_jobs=1, nthread=None, objective='binary:logistic',
       random_state=100, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
       seed=None, silent=True, subsample=1)

In [11]:
preds = estimator.predict(X_valid)

In [15]:
pred_proba = estimator.predict_proba(X_valid)
w = (40000-1786)/train.shape[0]
avgCTR = sum(train.click)/train.shape[0]
valid_score = pred_proba[:,1]/(pred_proba[:,1]+(1-pred_proba[:,1])/w)

In [18]:
XGB_bid = []
for i in range(validation.shape[0]):
    XGB_bid.append(np.sqrt(93.9455 / 5e-06 * valid_score[i] + 93.9455 ** 2) - 93.9455)

### LR bid price

In [21]:
lr_model = LogisticRegression(penalty="l2", C=1.0)

In [22]:
features_total_names = joblib.load("total_features_basebid.pkl")
estimator = lr_model
X_train = pd.DataFrame(columns=features_total_names, data=X_resampled)
y_train = y_resampled
estimator.fit(X_train,y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [23]:
preds1 = estimator.predict(X_valid)

In [24]:
pred_proba1 = estimator.predict_proba(X_valid)
w = (40000-1786)/train.shape[0]
avgCTR = sum(train.click)/train.shape[0]
valid_score = pred_proba1[:,1]/(pred_proba1[:,1]+(1-pred_proba1[:,1])/w)

In [25]:
LR_bid = []
for i in range(validation.shape[0]):
    LR_bid.append(np.sqrt(74.7727 / 5e-06 * valid_score[i] + 74.7727 ** 2) - 74.7727)

### LightGBM

In [30]:
import lightgbm
lgbm = lightgbm.LGBMClassifier(
learning_rate =0.1,
n_estimators=700,
max_depth=5,
min_child_weight=7,
subsample=0.8,
colsample_bytree=0.8,
reg_alpha=0.01,
#objective= 'binary:logistic',
)

In [31]:
features_total_names = joblib.load("total_features_basebid.pkl")
estimator = lgbm
X_train = pd.DataFrame(columns=features_total_names, data=X_resampled)
y_train = y_resampled
estimator.fit(X_train,y_train)

LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=0.8,
        importance_type='split', learning_rate=0.1, max_depth=5,
        min_child_samples=20, min_child_weight=7, min_split_gain=0.0,
        n_estimators=700, n_jobs=-1, num_leaves=31, objective=None,
        random_state=None, reg_alpha=0.01, reg_lambda=0.0, silent=True,
        subsample=0.8, subsample_for_bin=200000, subsample_freq=0)

In [32]:
preds = estimator.predict(X_valid)

In [None]:
pred_proba = estimator.predict_proba(X_valid)
w = (40000-1786)/train.shape[0]
avgCTR = sum(train.click)/train.shape[0]
valid_score = pred_proba[:,1]/(pred_proba[:,1]+(1-pred_proba[:,1])/w)

In [34]:
LGBM_bid = []
for i in range(validation.shape[0]):
    LGBM_bid.append(76.701110 * valid_score[i] / avgCTR)

### Competition

In [36]:
Multi = pd.DataFrame(columns = ['bidid','LR_bid','XGB_bid','LGBM_bid','payprice'])
Multi['bidid'] = validation['bidid']
Multi['LR_bid'] = LR_bid
Multi['XGB_bid'] = XGB_bid
Multi['LGBM_bid'] = LGBM_bid
Multi['payprice'] = validation['payprice']

In [44]:
Multi.to_csv('multiagent.csv')

In [45]:
df = pd.read_csv('multiagent.csv')

In [50]:
df = df.drop(columns = ['Unnamed: 0'])

In [2]:
df = pd.read_csv("multiagent.csv")

In [28]:
df.head()

Unnamed: 0,bidid,LR_bid,XGB_bid,LGBM_bid,payprice
0,bbcb813b6166538503d8b33a5602d7d72f6019dc,63.096608,59.209892,93.300709,23
1,5a07316c49477cb5d9b4d5aa39c27d6c3be7f92d,23.088078,15.573007,27.716773,75
2,f6ece71dae81d6b16bfb24ad6dd5611472d4c673,10.399805,1.083546,11.567147,65
3,b4d5c57c9b38ff5a12954fa01e11931b4e6bfbbb,23.26198,24.841125,27.95367,6
4,0899bf144249458ea9c89188473694bf44c7ca15,15.947828,28.263443,18.353193,5


In [3]:
payprice = []
temp = []
for i in range(len(df)):
    temp.append([df['LR_bid'][i],df['XGB_bid'][i],df['LGBM_bid'][i],df['payprice'][i]])
    payprice.append(np.sort(temp[i])[-2])

In [7]:
winner = []
temp1 = []
for i in range(len(df)):
    temp1.append([df['LR_bid'][i],df['XGB_bid'][i],df['LGBM_bid'][i],df['payprice'][i]])
    winner.append(np.sort(temp[i])[-1])

In [12]:
multi = pd.DataFrame(columns=['bid','Imps','spend','clicks'])
Imps = 0
budget = 6250*1000
flag = True
spend = 0
num_click = 0
for i in range(len(df)):
    if df['XGB_bid'][i] == winner[i] and flag:
        spend = spend + payprice[i]
        if spend > 6250000:
                spend = spend - payprice[i]
                flag = False
                break
        num_click = num_click + validation.click[i]
        Imps = Imps + 1
    multi.loc['strategy'] = "XGB strategy"
    multi.loc['clicks'] = num_click
    multi.loc['spend'] = spend/1000
    multi.loc['Imps'] = Imps

In [13]:
multi

Unnamed: 0,bid,Imps,spend,clicks
strategy,XGB strategy,XGB strategy,XGB strategy,XGB strategy
clicks,39,39,39,39
spend,3699.39,3699.39,3699.39,3699.39
Imps,48435,48435,48435,48435


In [21]:
multi1 = pd.DataFrame(columns=['1'])
Imps = 0
budget = 6250*1000
flag = True
spend = 0
num_click = 0
for i in range(len(df)):
    if df['LGBM_bid'][i] == winner[i] and flag:
        spend = spend + payprice[i]
        if spend > 6250000:
                spend = spend - payprice[i]
                flag = False
                break
        num_click = num_click + validation.click[i]
        Imps = Imps + 1
    multi1.loc['strategy'] = "LGBM strategy"
    multi1.loc['clicks'] = num_click
    multi1.loc['spend'] = spend/1000
    multi1.loc['Imps'] = Imps

In [22]:
multi1

Unnamed: 0,1
strategy,LGBM strategy
clicks,41
spend,6249.83
Imps,39477


In [23]:
multi2 = pd.DataFrame(columns=['1'])
Imps = 0
budget = 6250*1000
flag = True
spend = 0
num_click = 0
for i in range(len(df)):
    if df['LR_bid'][i] == winner[i] and flag:
        spend = spend + payprice[i]
        if spend > 6250000:
                spend = spend - payprice[i]
                flag = False
                break
        num_click = num_click + validation.click[i]
        Imps = Imps + 1
    multi2.loc['strategy'] = "LR strategy"
    multi2.loc['clicks'] = num_click
    multi2.loc['spend'] = spend/1000
    multi2.loc['Imps'] = Imps

In [24]:
multi2

Unnamed: 0,1
strategy,LR strategy
clicks,0
spend,0
Imps,0


In [26]:
multi3 = pd.DataFrame(columns=['1'])
Imps = 0
budget = 6250*1000
flag = True
spend = 0
num_click = 0
for i in range(len(df)):
    if df['payprice'][i] == winner[i] and flag:
        spend = spend + payprice[i]
        if spend > 6250000:
                spend = spend - payprice[i]
                flag = False
                break
        num_click = num_click + validation.click[i]
        Imps = Imps + 1
    multi3.loc['strategy'] = "original"
    multi3.loc['clicks'] = num_click
    multi3.loc['spend'] = spend/1000
    multi3.loc['Imps'] = Imps

In [27]:
multi3

Unnamed: 0,1
strategy,original
clicks,25
spend,5755.95
Imps,126932
