In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, KFold, cross_val_score, GridSearchCV
from sklearn.metrics import confusion_matrix, precision_score, recall_score, roc_curve, auc, accuracy_score, roc_auc_score, fbeta_score
import numpy as np
from utils import *

In [None]:
df_train = pd.read_csv('/Users/dysson/Downloads/train.gz',compression='gzip')
df_train.rename(columns = {'C1': 'search_engine_type', 'C14': 'product_type', 'C15': 'advertiser_type'}, inplace = True)

In [None]:
# Define X and y
X = df_train.loc[:, ~df_train.columns.isin(['click'])]
y = df_train.click

In [None]:
X_reduce = X[['hour', 'search_engine_type', 'banner_pos', 'device_type', 'device_conn_type', 'product_type', 'advertiser_type', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21']]

In [None]:
# Set up classifier using training data to predict test data
X_train, X_test, y_train, y_test = train_test_split(
  X_reduce, y, test_size = .2, random_state = 0)

### MLP

In [None]:
# Scale features and split into training and testing
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(
  X, y, test_size = .2, random_state = 0)

In [None]:
# Create classifier and produce predictions
clf = MLPClassifier(hidden_layer_sizes = (8, ), max_iter = 100)
y_score = clf.fit(X_train, y_train).predict_proba(X_test) 
y_pred = clf.fit(X_train, y_train).predict(X_test) 

In [None]:
# Get accuracy and AUC of ROC curve 
fpr, tpr, thresholds = roc_curve(y_test, y_score[:, 1])
roc_auc = auc(fpr, tpr)
print("Accuracy: %s" %(accuracy_score(y_test, y_pred)))
print("ROC of AUC curve: %s" %(roc_auc))

In [None]:
# Loop over various max_iter configurations
max_iter_list = [10, 20, 30]
for max_iter in max_iter_list:
	clf = MLPClassifier(hidden_layer_sizes = (4, ), 
                        max_iter = max_iter, random_state = 0)
   	# Extract relevant predictions
	y_score = clf.fit(X_train, y_train).predict_proba(X_test)
	y_pred = clf.fit(X_train, y_train).predict(X_test)

	# Get ROC curve metrics
	print("Accuracy for max_iter = %s: %s" %(
      max_iter, accuracy_score(y_test, y_pred)))
	print("AUC for max_iter = %s: %s" %(
      max_iter, roc_auc_score(y_test, y_score[:, 1])))

In [None]:
# Create and loop over various hidden_layer_sizes configurations
hidden_layer_sizes_list = [(4,), (8,), (16,)]
for hidden_layer_sizes in hidden_layer_sizes_list:
	clf = MLPClassifier(hidden_layer_sizes = hidden_layer_sizes, 
                        max_iter = 10, random_state = 0)
   	# Extract relevant predictions
	y_score = clf.fit(X_train, y_train).predict_proba(X_test)
	y_pred = clf.fit(X_train, y_train).predict(X_test)

	# Get ROC curve metrics
	print("Accuracy for hidden_layer_sizes = %s: %s" %(
      hidden_layer_sizes, accuracy_score(y_test, y_pred)))
	print("AUC for hidden_layer_sizes = %s: %s" %(
      hidden_layer_sizes, roc_auc_score(y_test, y_score[:, 1])))

In [None]:
# Create list of hyperparameters 
max_iter = [10, 20]
hidden_layer_sizes = [(8,), (16,)]
param_grid = {'max_iter': max_iter, 'hidden_layer_sizes': hidden_layer_sizes}

# Use Grid search CV to find best parameters using 4 jobs
mlp = MLPClassifier()
clf = GridSearchCV(estimator = mlp, param_grid = param_grid, 
           scoring = 'roc_auc', n_jobs = 4)
clf.fit(X_train, y_train)
print("Best Score: ")
print(clf.best_score_)
print("Best Estimator: ")
print(clf.best_estimator_)

In [None]:
# PRECISION: ROI on ad spend through clicks
## Low precision --> little tangible ROI on clicks (may be prioritized for business bc tangible ROI)

# Recall: targeting relevant audience
## Low recall --> missed opportunities on ROI

# F-beta score: weighted harmonic mean bw precision and recall and thus will always lay bw precision and recall but will be closer to the less of two values
## beta coefficient is how much want to weight the two metrcs: 
# beta = 1 --> equal weightage
# 0 < beta < 1 --> precision is made smaller and thus more weighted
# beta > 1 --> recall is made smaller and thus more weighted

In [None]:
# Set up MLP classifier, train and predict
X_train, X_test, y_train, y_test = train_test_split(
  X, y, test_size = .2, random_state = 0)
clf = MLPClassifier(hidden_layer_sizes = (16, ), 
                    max_iter = 10, random_state = 0)
y_pred = clf.fit(X_train, y_train).predict(X_test) 

# Evaluate precision and recall
prec = precision_score(y_test, y_pred, average = 'weighted')
recall = recall_score(y_test, y_pred, average = 'weighted')
fbeta = fbeta_score(y_test, y_pred, beta  = 0.5, average = 'weighted')
print("Precision: %s, Recall: %s, F-beta score: %s" %(prec, recall, fbeta))

In [None]:
# Get precision and total ROI
prec = precision_score(y_test, y_pred, average = 'weighted')
r = 0.2
cost = 0.05 
roi = prec * r / cost

# Get AUC
roc_auc = roc_auc_score(y_test, y_score[:, 1])

print("Total ROI: %s, Precision: %s, AUC of ROC curve: %s" %(
  roi, prec, roc_auc))