In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
import math
import random
from sklearn.utils import resample
from sklearn.externals import joblib
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import RFECV
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import precision_score

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
#import Data
X_train = pickle.load(open("X_train_features.pkl", 'rb'))
y_train = pickle.load(open("y_train_features.pkl", 'rb'))
X_validation = pickle.load(open("X_validation_features.pkl", 'rb'))
y_validation = pickle.load(open("y_validation_features.pkl", 'rb'))

Lin_features = pickle.load(open('bestFeatures.pkl', 'rb'))


In [3]:
print(str(len(Lin_features)) + " of the original " + str(len(X_train.columns)) + " are now being used")

240 of the original 580 are now being used


In [None]:
#Liimit the input data by the feature columns
feature_X_train = X_train[Lin_features].values
feature_X_val = X_validation[Lin_features].values

print(len(X_validation.columns))

580


In [None]:
#import unprocessed original data
traindf = pd.read_csv("train.csv")
validationdf = pd.read_csv("validation.csv")

In [None]:
avgCTR = traindf["click"].sum()*100/traindf.shape[0]

avgBid_training = traindf.loc[traindf["click"] == 1, "payprice"].mean()

print("In the training data, the average CTR is: %.4f , and the average BID is: $%.2f"
      % (avgCTR, avgBid_training))


bid_list = list(range(traindf.loc[traindf["click"] == 1, "payprice"].min(),300, 5))

In the training data, the average CTR is: 0.0738 , and the average BID is: $105.46


In [None]:
clf = LogisticRegression(C = 0.1, random_state = 123)
clf.fit(feature_X_train, y_train)

predict = clf.predict_proba(feature_X_val)
predict_proba_list = clf.predict_proba(feature_X_val)

validationdf['click_proba'] = [item[1] for item in predict_proba_list]


#kpi = precision_score(feature_X_val, predict)*100

print("Accuracty level of the Logistic Regression model :::: %.2f" % 
      clf.score(feature_X_val, y_validation))

Accuracty level of the Logistic Regression model :::: 1.00


In [None]:

def lin_bid(base_bid, budget = 6250000):
    
    bid = (base_bid * validationdf["click_proba"]) / avgCTR
        
    counter = 0
    clicks = 0
    imp = 0
    
    while ((budget >= 0) & (counter < len(validationdf))):

        if(validationdf['payprice'][counter] < bid[counter]):
            budget -= validationdf['payprice'][counter]

            clicks += validationdf['click'][counter]
            imp += 1
            
        counter += 1
    return clicks, imp

In [None]:
clicks_list = []
imp_list = []

for bid in bid_list:
    
    total_clicks, total_imp = lin_bid(bid)
    print("Bidding $" + str(bid) + " scores:: " + str(total_clicks) + " clicks")
    clicks_list.append(total_clicks)
    imp_list.append(total_imp)    

Bidding $4 scores:: 7 clicks
Bidding $9 scores:: 39 clicks
Bidding $14 scores:: 60 clicks
Bidding $19 scores:: 73 clicks
Bidding $24 scores:: 82 clicks
Bidding $29 scores:: 96 clicks
Bidding $34 scores:: 103 clicks
Bidding $39 scores:: 115 clicks
Bidding $44 scores:: 119 clicks
Bidding $49 scores:: 122 clicks
Bidding $54 scores:: 128 clicks
Bidding $59 scores:: 134 clicks
Bidding $64 scores:: 139 clicks
Bidding $69 scores:: 146 clicks
Bidding $74 scores:: 146 clicks
Bidding $79 scores:: 150 clicks
Bidding $84 scores:: 156 clicks
Bidding $89 scores:: 142 clicks
Bidding $94 scores:: 138 clicks
Bidding $99 scores:: 136 clicks
Bidding $104 scores:: 129 clicks
Bidding $109 scores:: 125 clicks
Bidding $114 scores:: 123 clicks
Bidding $119 scores:: 116 clicks
Bidding $124 scores:: 113 clicks
Bidding $129 scores:: 106 clicks
Bidding $134 scores:: 100 clicks
Bidding $139 scores:: 100 clicks
Bidding $144 scores:: 99 clicks
Bidding $149 scores:: 95 clicks
Bidding $154 scores:: 92 clicks
Bidding $

In [None]:
plt.figure(figsize = (10,8))

plt.subplot(3,1,1)
plt.plot(bid_list, imp_list, "b-" )
plt.xlabel("Base Bid")
plt.ylabel("# of Impressions")

In [None]:
plt.figure(figsize = (10,8))

plt.subplot(3,1,1)
plt.plot(bid_list, clicks_list, "r")

plt.xlabel("Base Bid")
plt.ylabel("# of Clicks")

In [None]:
max_click = max(clicks_list)
bid_index = clicks_list.index(max_click)
best_bid = bid_list[bid_index]

print("The best bid for this model is::: $" + str(best_bid))