# Neural Network

## Enviroment

In [27]:
import os
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import random
import math

import sys
sys.path.append('../')

from Preprocessing import preprocessing
from Preprocessing.single_set import SingleSet

In [2]:
%matplotlib inline

## Data Preprocessing

Features: data.data_features
Targets: data.data_targets (click, bidprice, payprice)

In [61]:
train_data_path = '/Data/train.csv'
train_data = SingleSet(relative_path=train_data_path,use_numerical_labels=True)

val_data_path = '/Data/validation.csv'
val_data = SingleSet(relative_path=val_data_path,use_numerical_labels=True)

test_data_path = '/Data/test.csv'
test_data = SingleSet(relative_path=test_data_path,use_numerical_labels=True)

-- data loaded --
-- data loaded --
-- data loaded --


In [62]:
def pandas_to_numpy(data):

    ## features
    features = np.asarray(data.data_features.values)

    ## targets
    if hasattr(data, "data_targets"):
        labels = np.asarray(data.data_targets.values)
        
    return features, labels


## drop unnecessary features
def drop_features(data):
    
    if 'userid' in data.data_features:
        data.data_features.drop('userid', axis=1, inplace = True)
        
    if 'urlid' in data.data_features:
        data.data_features.drop('bidid', axis=1, inplace = True)


drop_features(train_data)
drop_features(val_data)
drop_features(test_data)

x_train, y_train = pandas_to_numpy(train_data)
x_val, y_val = pandas_to_numpy(val_data)
x_test, y_test = pandas_to_numpy(test_data)


### Data Input Shapes

In [63]:
input_shape = x_train.shape[1]
print("input_shape", input_shape)
output_shape = 1

# targets_________________________________________________

# clicks
y_train_clicks = np.reshape(y_train[:,0], (y_train.shape[0], 1))  # get first column (clicks)
y_val_clicks = np.reshape(y_val[:,0], (y_val.shape[0], 1))  # get first column (clicks)

# payprice
y_train_payprice = np.reshape(y_train[:,2], (y_train.shape[0], 1))  # get third column (payprice)
y_val_payprice = np.reshape(y_val[:,2], (y_val.shape[0], 1))  # get third column (payprice)

input_shape 20


### Scale Data

In [22]:
# normalize the data attributes
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler

#normalized_X = preprocessing.normalize(x_train)



## features
feature_scaler = MinMaxScaler(feature_range=(0, 1))
feature_scaler.fit(np.concatenate((x_train, x_val, x_test), axis = 0))       

x_train = feature_scaler.transform(x_train)
x_val = feature_scaler.transform(x_val)
x_test = feature_scaler.transform(x_test)

## Targets________________________________________

# payprice
#payprice_scaler = MinMaxScaler(feature_range=(0, 1))
#payprice_scaler.fit(np.concatenate((y_train_payprice, y_val_payprice), axis = 0))   

#y_train_payprice = payprice_scaler.transform(y_train_payprice)
#y_val_payprice = payprice_scaler.transform(y_val_payprice)



# Model - Neural Networks

## "Click" - Binary Classification

Train Baseline Accuracy "Clicks": 0.9992618932746251%
#of 0:    2429188     # of 1:       1793


Val Baseline Accuracy "Clicks": 0.9993349203056733%
#of 0:    303723     # of 1:       202


## Upsampling

sample up "1"s for more balanced classification

--> default accuracy: 50%

In [41]:
def upsampling(x, y):

    xy = np.concatenate((x, y), axis = 1)

    zeros = xy[xy[:,-1] == 0]
    ones = xy[xy[:,-1] == 1]

    ones_upsampled = np.repeat(ones, math.ceil(len(zeros)/len(ones)), axis=0)

    # cut at length of zeros.shape 2429188
    ones_upsampled = ones_upsampled[:zeros.shape[0]]

    xy_upsampled  = np.concatenate((ones_upsampled, zeros), axis = 0) # combine
    np.random.shuffle(xy_upsampled)                                   # shuffle

    x_upsampled = xy_upsampled[:,:-1]   # features
    y_upsampled = xy_upsampled[:,-1:]   # targets
    
    return x_upsampled, y_upsampled


x_train_up, y_train_clicks_up = upsampling(x_train, y_train_clicks)

(4858376, 19)


### Integer / Categorical One-Hot Encoding

In [24]:
#y_train_clicks_up = keras.utils.to_categorical(y_train_clicks_up, 2)
#y_val_clicks = keras.utils.to_categorical(y_val_clicks, 2)

y_train_clicks_up = y_train_clicks_up.astype(int)
y_val_clicks = y_val_clicks.astype(int)

### Binary Model Compile

In [9]:
#from sklearn.model_selection import train_test_split, StratifiedKFold
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical
from keras import regularizers, initializers
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from keras.models import Model, Input
from keras import optimizers

from random import shuffle
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import model_from_json

import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
import warnings
warnings.filterwarnings('ignore', '.*do not.*',)



def create_model_clicks(input_shape, output_shape):

    ## sequential model
    model = Sequential()
    model.add(Dense(24, input_dim=input_shape, activation='relu'))#, kernel_regularizer=regularizers.l2(0.01)))
    model.add(Dense(24, activation='relu'))#, kernel_regularizer=regularizers.l2(0.01)))


    #model.add(Dropout(0.2))
    #model.add(Dense(6, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    #model.add(Dropout(0.2))
    # working model___________
    #model.add(Dense(3, input_dim=input_shape, activation='relu'))
    #model.add(Dense(12, activation='relu'))
    #model.add(Dense(12, activation='relu'))


    #model.add(Dropout(0.5))
    #model.add(Dropout(0.1))
    model.add(Dense(output_shape, activation='sigmoid'))

    ## other way of defining model
    
    #inputs = Input(shape=(input_shape,))
    #outputs = Dense(output_shape, activation="relu", kernel_regularizer=regularizers.l2(5e-4),kernel_initializer=initializers.he_normal(seed=13))(inputs)
    #model = Model(inputs=inputs, outputs=outputs, name='bidder')

    return model


def plot_curves_clicks(history):

    # Loss Curves
    plt.figure(figsize=[8, 6])
    plt.plot(history.history['loss'],'cadetblue', linewidth=3.0)
    plt.plot(history.history['val_loss'],'midnightblue', linewidth=3.0)
    plt.legend(['Training loss', 'Validation Loss'], fontsize=12)
    plt.xlabel('Epochs ', fontsize=12)
    plt.ylabel('Loss', fontsize=12)
    plt.title('Loss Curves', fontsize=16, fontweight="bold")
    #plt.savefig('Results/neural_network/loss_curve.png')
    #plt.show()

    # Accuracy Curves
    plt.figure(figsize=[8, 6])
    plt.plot(history.history['acc'],'cadetblue', linewidth=3.0)
    plt.plot(history.history['val_acc'], 'midnightblue', linewidth=3.0)
    plt.legend(['Training Accuracy', 'Validation Accuracy'], fontsize=12)
    plt.xlabel('Epochs ', fontsize=12)
    plt.ylabel('Accuracy', fontsize=12)
    plt.title('Accuracy Curves', fontsize=16, fontweight="bold")
    #plt.savefig('Results/neural_network/accuracy_curve.png')
    plt.show()


Using TensorFlow backend.


In [60]:
print("Click Classifier - features: ", input_shape, "targets: ", output_shape)

# Clear model, and create it
model_clicks = create_model_clicks(input_shape, output_shape)
print(model_clicks.summary())
    

## hyperparameters_______________________________________
    

## model compile and train
model_clicks.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# specify learning rate
#sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)

#model.compile(loss='mean_squared_error', optimizer=sgd, metrics=['accuracy'])


# Set callback functions to early stop training and save the best model so far
callbacks = [EarlyStopping(monitor='val_loss', patience=3),
             ModelCheckpoint('Results/neural_network/' + 'trained_weights.h5', monitor='loss', save_best_only=True)]


# Unbalanced Model weights
class_weight = {0: 0.5, 1: 0.5} #{0: (1793 / 2429188), 1: (1-(1793 / 2429188))}

# Fit the model on the batches
batch_size = 16
epochs = 16
history = model_clicks.fit(x=x_train_up, y=y_train_clicks_up, batch_size=batch_size, class_weight = class_weight,
                    epochs=epochs, validation_data=(x_val, y_val_clicks), shuffle=True, callbacks=callbacks, verbose=1)

## model evaluate
plot_curves_clicks(history)


## save model to disk_____________________________________

# serialize model to JSON
model_json = model_clicks.to_json()

if not os.path.exists('Results/neural_network'):
    os.makedirs('Results/neural_network/')

with open('Results/neural_network/click_architecture.json', "w") as json_file:
    json_file.write(model_json)
    
# serialize weights to HDF5
model_clicks.save_weights('Results/neural_network/click_weights.h5')
print("saved model to disk")



Click Classifier - features:  20 targets:  1
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_31 (Dense)             (None, 24)                504       
_________________________________________________________________
dense_32 (Dense)             (None, 24)                600       
_________________________________________________________________
dense_33 (Dense)             (None, 1)                 25        
Total params: 1,129
Trainable params: 1,129
Non-trainable params: 0
_________________________________________________________________
None


ValueError: Error when checking input: expected dense_31_input to have shape (20,) but got array with shape (19,)

### Prediction

In [11]:
## Load Model for Testing_______________________________________________________________

# Model reconstruction from JSON file
#with open('Results/neural_network/' + 'model_architecture.json', 'r') as f:
#    model_test = model_from_json(f.read())

# Load weights into the new model
#model_test.load_weights('Results/neural_network/' + 'trained_weights.h5')

#model_test.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


click_predictions = model_clicks.predict_classes(x_val, verbose=1)
click_predictions_df = pd.DataFrame(click_predictions, columns= ['click'])

print(click_predictions_df['click'].value_counts())

from sklearn import metrics
print("\nF1_score:", metrics.f1_score(y_val_clicks, click_predictions, average='weighted'))
print("Accuracy:", metrics.accuracy_score(y_val_clicks, click_predictions))


#test_score = model.evaluate(x = x_val, y = y_val)
#print('\ntest accuracy:', test_score[1])

0    237725
1     66200
Name: click, dtype: int64

F1_score: 0.877083388136186
Accuracy: 0.7820580735378794


## "Payprice" - Float Model

### Compile Model "Payprice"

In [25]:
from sklearn.model_selection import train_test_split, StratifiedKFold
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical
from keras import regularizers, initializers
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from keras.models import Model, Input
from keras import optimizers

import matplotlib.pylab as plt
from random import shuffle
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import model_from_json

import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
import warnings
warnings.filterwarnings('ignore', '.*do not.*',)



def create_model_payprice(input_shape, output_shape):

    ## sequential model
    model = Sequential()
    model.add(Dense(24, input_dim=input_shape, activation='relu'))
    model.add(Dense(24, activation='relu'))
    #model.add(Dense(12, input_dim=input_shape, activation='relu'))

    # working model___________
    #model.add(Dense(3, input_dim=input_shape, activation='relu'))
    #model.add(Dense(12, activation='relu'))
    #model.add(Dense(12, activation='relu'))


    #model.add(Dropout(0.5))
    #model.add(Dropout(0.1))
    model.add(Dense(output_shape, activation='linear'))

    ## other way of defining model
    
    #inputs = Input(shape=(input_shape,))
    #outputs = Dense(output_shape, activation="relu", kernel_regularizer=regularizers.l2(5e-4),kernel_initializer=initializers.he_normal(seed=13))(inputs)
    #model = Model(inputs=inputs, outputs=outputs, name='bidder')

    return model

    
def plot_curves(history):
    
    # Loss Curves
    plt.figure(figsize=[8, 6])
    plt.plot(history.history['loss'],'cadetblue', linewidth=3.0)
    plt.plot(history.history['val_loss'],'midnightblue', linewidth=3.0)
    plt.legend(['Training loss', 'Validation Loss'], fontsize=12)
    plt.xlabel('Epochs ', fontsize=12)
    plt.ylabel('Loss', fontsize=12)
    plt.title('Loss Curves', fontsize=16, fontweight="bold")
    #plt.savefig('Results/neural_network/loss_curve.png')
    #plt.show()

    # Accuracy Curves
    plt.figure(figsize=[8, 6])
    plt.plot(history.history['mean_squared_error'],'cadetblue', linewidth=3.0)
    plt.plot(history.history['val_mean_squared_error'], 'midnightblue', linewidth=3.0)
    plt.legend(['Training MSE', 'Validation MSE'], fontsize=12)
    plt.xlabel('Epochs ', fontsize=12)
    plt.ylabel('Mean Squared Error', fontsize=12)
    plt.title('Mean Squared Error Curves', fontsize=16, fontweight="bold")
    #plt.savefig('Results/neural_network/accuracy_curve.png')
    plt.show()





In [None]:
print("features: ", input_shape, "targets: ", output_shape)

# Clear model, and create it
model_payprice = create_model_payprice(input_shape, output_shape)
print(model_payprice.summary())


## hyperparameters_______________________________________
    

## model compile and train
model_payprice.compile(optimizer='adam', loss='mse', metrics=['mse'])

# specify learning rate
#sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)

#model.compile(loss='mean_squared_error', optimizer=sgd, metrics=['accuracy'])


# Set callback functions to early stop training and save the best model so far
callbacks = [EarlyStopping(monitor='val_loss', patience=3),
             ModelCheckpoint('Results/neural_network/' + 'trained_weights.h5', monitor='loss', save_best_only=True)]

# Fit the model on the batches
batch_size = 16
epochs = 16
history = model_payprice.fit(x=x_train, y=y_train_payprice, batch_size=batch_size, 
                    epochs=epochs, validation_data=(x_val, y_val_payprice), shuffle=True, callbacks=callbacks)
# verbose=1, shuffle=True, callbacks=callbacks

## model evaluate
plot_curves(history)


## save model to disk_____________________________________

# serialize model to JSON
model_json = model_payprice.to_json()

if not os.path.exists('Results/neural_network'):
    os.makedirs('Results/neural_network/')

with open('Results/neural_network/payprice_architecture.json', "w") as json_file:
    json_file.write(model_json)
    
# serialize weights to HDF5
model_payprice.save_weights('Results/neural_network/payprice_weights.h5')
print("saved model to disk")

features:  20 targets:  1
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_37 (Dense)             (None, 24)                504       
_________________________________________________________________
dense_38 (Dense)             (None, 24)                600       
_________________________________________________________________
dense_39 (Dense)             (None, 1)                 25        
Total params: 1,129
Trainable params: 1,129
Non-trainable params: 0
_________________________________________________________________
None
Train on 2430981 samples, validate on 303925 samples
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16

### Prediction

In [82]:
## Load Model for Testing_______________________________________________________________

# Model reconstruction from JSON file
#with open('Results/neural_network/' + 'model_architecture.json', 'r') as f:
#    model_test = model_from_json(f.read())

# Load weights into the new model
#model_test.load_weights('Results/neural_network/' + 'trained_weights.h5')

#model_test.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

payprice_predictions_scaled = model_payprice.predict(x_val, verbose=1)

# scale back
#payprice_predictions = payprice_scaler.inverse_transform(payprice_predictions_scaled)
payprice_predictions_df = pd.DataFrame(payprice_predictions_scaled, columns= ['payprice'])

from sklearn import metrics
print("\nMean Squared Error:", metrics.mean_squared_error(y_val_payprice, payprice_predictions_scaled))
print("\n", payprice_predictions_df)

#test_score = model.evaluate(x = x_val, y = y_val)
#print('\ntest accuracy:', test_score[1])


Mean Squared Error: 3297.598304984801

           payprice
0        65.272087
1        63.293770
2        64.296890
3        64.017441
4        63.370773
5        58.483658
6        72.513359
7        77.601212
8        64.276192
9        58.483658
10       91.474754
11       58.804523
12       58.705933
13       63.113182
14      134.359344
15       62.680466
16       63.365250
17       61.052784
18       59.031422
19       58.770767
20       58.483658
21       63.530148
22       63.306828
23       64.190689
24       58.483658
25       61.681015
26       81.997787
27       59.908932
28       58.483658
29       83.857376
...            ...
303895  141.173004
303896   58.483658
303897   58.483658
303898   63.297092
303899   62.879070
303900   58.483658
303901   62.643410
303902   60.305492
303903   58.809887
303904   58.483658
303905   58.483658
303906  103.571640
303907   62.748775
303908   84.069595
303909   60.344345
303910   81.940742
303911   64.722328
303912   81.983383
303913   

# Make Bidding

## Predict Clicks and Payprice

In [154]:
train_data_path = '/Data/train.csv'
train_data = SingleSet(relative_path=train_data_path,use_numerical_labels=True)

val_data_path = '/Data/validation.csv'
val_data = SingleSet(relative_path=val_data_path,use_numerical_labels=True)

test_data_path = '/Data/test.csv'
test_data = SingleSet(relative_path=test_data_path,use_numerical_labels=True)

-- data loaded --
-- data loaded --
-- data loaded --


In [53]:
def pandas_to_numpy(data):

    ## features
    features = np.asarray(data.data_features.values)

    ## targets
    if hasattr(data, "data_targets"):
        labels = np.asarray(data.data_targets.values)
        
    return features, labels



## drop unnecessary features
def drop_features(data):
    
    if 'userid' in data.data_features:
        data.data_features.drop('userid', axis=1, inplace = True)
        
    if 'urlid' in data.data_features:
        data.data_features.drop('urlid', axis=1, inplace = True)


drop_features(train_data)
drop_features(val_data)
drop_features(test_data)

x_train, y_train = pandas_to_numpy(train_data)
x_val, y_val = pandas_to_numpy(val_data)
x_test, y_test = pandas_to_numpy(test_data)

print(x_test.shape)

(303375, 20)


### Scale Data

In [50]:
# normalize the data attributes
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler

## features
feature_scaler = MinMaxScaler(feature_range=(0, 1))
feature_scaler.fit(np.concatenate((x_train, x_val, x_test), axis = 0))       

x_train = feature_scaler.transform(x_train)
x_val = feature_scaler.transform(x_val)
x_test = feature_scaler.transform(x_test)

## Load Models

In [86]:
## Load Models for Bidding _______________________________________________________________


## CLICKS___________

# Model reconstruction from JSON file
with open('Results/neural_network/click_architecture.json', 'r') as f:
    model_clicks = model_from_json(f.read())

# Load weights into the new model
model_clicks.load_weights('Results/neural_network/click_weights.h5')


## PAYPRICE___________

# Model reconstruction from JSON file
with open('Results/neural_network/payprice_architecture.json', 'r') as f:
    model_payprice = model_from_json(f.read())

# Load weights into the new model
model_payprice.load_weights('Results/neural_network/payprice_weights.h5')

## Predict

In [87]:
predict_data = x_val

## CLICKS___________

click_predictions = model_clicks.predict_classes(predict_data, verbose=1)

## PAYPRICE___________

payprice_predictions = model_payprice.predict(predict_data, verbose=1)
#payprice_predictions = payprice_scaler.inverse_transform(payprice_predictions_scaled)




## Make Bidding Decision

In [108]:
def set_bids(bids, click_predictions, payprice_predictions):

## 1.) Only bid for expected clicks!

    for p in range(0, len(bids)):

        if click_predictions[p] == 1:
            #bids[p] = math.floor(payprice_predictions[p])
            bids[p] = 71
            
    return bids



def set_bidprices(bids, budget, payprice_predictions):
    
    ## 2.) Prefer cheap payprice predictions
    
    budget = 6250000
    planned_bid_amount = sum(bids)
    exceed_budget = 100
    n_bids = len(bids[np.where(bids > 0)])

    

    ## (1) spend too much_______________________
    
    if planned_bid_amount - budget > 0:
        
        print("-- spend too much:", planned_bid_amount - budget)
        
        while (planned_bid_amount - budget > exceed_budget):

            #print(round(np.mean(bids)))
            #print(budget - planned_bid_amount)
            index, = list(np.where(bids == max(bids)))    # find expensive bids
            bids[index] = max(bids) - 1                   # set expensive bid lower
            planned_bid_amount = sum(bids)                # check new bidding amount

    
    ## (2) spend too little______________________

    else:
        
        print("-- spend too little:", budget - planned_bid_amount)
        
        while (budget - planned_bid_amount >  (-exceed_budget)):

            #print(round(np.mean(bids)))
            #print(budget - planned_bid_amount)
            index, = list(np.where(bids == 0)) 
            index = random.sample(list(index), 1000)
            bids[index] = 60              
            planned_bid_amount = sum(bids)                


    n_bids = len(bids[np.where(bids > 0)])  
    print("planned_bid_amount:", sum(bids), ", difference to budget:", (budget - sum(bids)), 
              ", number of bids:", n_bids, ", average bidprice:",round(np.mean(bids[np.where(bids > 0)])))
    
    return bids

    

In [109]:
budget = 6250000
bid_array = np.zeros((len(predict_data)))

bid_decisions = set_bids(bid_array, click_predictions, payprice_predictions)
bids = set_bidprices(bid_decisions, budget, payprice_predictions)

-- spend too little: 3454659.0
planned_bid_amount: 6275341.0 , difference to budget: -25341.0 , number of bids: 97371 , average bidprice: 64.0


## Test Decision in Auction

In [100]:
data_path = os.path.abspath(os.pardir + '/Data/validation.csv')
df = pd.read_csv(data_path, na_values=['Na', 'null']).fillna(0)

In [110]:
budget = 6250000

## Evaluation Stats_____________

bids_won = 0
earned_clicks = 0
ctr = 0                  # bids_won / earned_clicks
total_paid = 0
cpc = 0                  # cost per click


for index, row in df.iterrows():

    if bids[index] > budget: # check if budget is sufficient for bidprice
        bids[index] = budget
        #print("constant bid reduced to:", constant_bid, ", total_paid:", total_paid, ", bids_won:", bids_won, ", earned clicks:", earned_clicks, "\n")

    if budget <= 0:
        print("-- break after auction #", index)
        break

    # WON BID ______________________________________________

    if bids[index] >= row['payprice']:     

        bids_won += 1                        # won the bid
        total_paid += row['payprice']        # add amount to total_paid   
        budget = budget - row['payprice']    # substract money from budget

        #if constant_bid == row['bidprice']:      
            #budget = budget - row['payprice']    # substract money from budget

        #elif constant_bid > row['bidprice']:
        #    budget = budget - row['bidprice']    # substract money from budget

        # CLICK = 1 ______________________________________________

        if row['click'] == 1:    # only reduce money from budget if ad has been clicked

                earned_clicks += 1                   # earn the click
                #print("current budget:", budget, ", earned clicks:", earned_clicks, "\n")

    if index%100000 == 0:
        print("bid#", index, ", budget:", budget, ", payprice:", row['payprice'], ", bids_won:", bids_won, ", earned_clicks:", earned_clicks, "\n")



print("__________________________________\n")

if earned_clicks > 0:
    cpc = total_paid / earned_clicks
if bids_won > 0:
    ctr = earned_clicks / bids_won

print("left budget:", budget)
print("bids_won:", bids_won)
print("earned clicks:", earned_clicks)
print("CTR:", ctr)
print("cost per click:", cpc)



bid# 0 , budget: 6250000 , payprice: 23 , bids_won: 0 , earned_clicks: 0 

bid# 100000 , budget: 5695728 , payprice: 63 , bids_won: 16235 , earned_clicks: 12 

bid# 200000 , budget: 5145324 , payprice: 196 , bids_won: 32468 , earned_clicks: 17 

bid# 300000 , budget: 4593496 , payprice: 60 , bids_won: 48665 , earned_clicks: 30 

__________________________________

left budget: 4573175
bids_won: 49262
earned clicks: 30
CTR: 0.0006089886728106857
cost per click: 55894.166666666664


## CSV Submission

In [178]:
data_path_test = os.path.abspath(os.pardir + '/Data/test.csv')
df_test = pd.read_csv(data_path_test, na_values=['Na', 'null']).fillna(0)


In [223]:
bidprice_series = pd.Series(data = bids, name='bidprice')
submission_df = pd.DataFrame({'bidid': df_test['bidid'],'bidprice':bidprice_series})

# Group Token: QQri5ISZz4Kn
submission_df.to_csv('testing_bidding_price.csv', index = False)
