In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# OPTIONAL: Load the "autoreload" extension so that code can change
%load_ext autoreload

# OPTIONAL: always reload modules so that as you change code in src, it gets loaded
%autoreload 2

In [2]:
import joblib
import pandas as pd
import numpy as np
import math
from src.models import eval_model, cross_validation
from sklearn.linear_model import LogisticRegression, PassiveAggressiveRegressor, LinearRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.svm import LinearSVR

In [3]:
X = joblib.load("../data/processed/X")
y = joblib.load("../data/processed/y")

In [4]:
max_roc_auc_score = 0.0
def get_roc_auc_score(classifier):
    global max_roc_auc_score
    roc_score_training, roc_score_val = cross_validation.cv(classifier, X, y)
    combined_roc_auc_score = roc_score_val * (1 - abs(roc_score_training - roc_score_val))

    if max_roc_auc_score < combined_roc_auc_score:
        print(u"\U0001F525"+" The score " + str(combined_roc_auc_score) + " is better than "+ str(max_roc_auc_score) + " so save the model "+u"\U0001F525")
        joblib.dump(classifier, "../models/kpw_best_classifier")
        max_roc_auc_score = combined_roc_auc_score
    else:
        print(u"\U00002744"+" The score " + str(combined_roc_auc_score) + " is not better "+u"\U00002744")

In [5]:
get_roc_auc_score(LogisticRegression())

Avg ROC AUC score of training set is: 0.7065996932722781
Avg ROC AUC score of valuation set is: 0.70259792948983
🔥 The score 0.6997862985419746 is better than 0.0 so save the model 🔥


In [6]:
get_roc_auc_score(LogisticRegression(penalty='elasticnet', solver='saga', l1_ratio=0.9, C=0.7))

Avg ROC AUC score of training set is: 0.7055151844928407
Avg ROC AUC score of valuation set is: 0.7025492117358325
🔥 The score 0.7004654699133664 is better than 0.6997862985419746 so save the model 🔥


In [7]:
get_roc_auc_score(LogisticRegression(penalty='elasticnet', solver='saga', l1_ratio=0.1, C=0.8))

Avg ROC AUC score of training set is: 0.7063133650101105
Avg ROC AUC score of valuation set is: 0.7025607469416709
❄ The score 0.6999243047885212 is not better ❄


In [8]:
get_roc_auc_score(LogisticRegression(solver='liblinear', C=1.2))

Avg ROC AUC score of training set is: 0.7067039045736678
Avg ROC AUC score of valuation set is: 0.7026227544659365
❄ The score 0.6997552455358533 is not better ❄


In [9]:
get_roc_auc_score(LogisticRegression(C=1.2))

Avg ROC AUC score of training set is: 0.7067630863652148
Avg ROC AUC score of valuation set is: 0.7026413187044467
❄ The score 0.6997451944398912 is not better ❄


In [10]:
get_roc_auc_score(LinearRegression())

Avg ROC AUC score of training set is: 0.7062174262663696
Avg ROC AUC score of valuation set is: 0.6996658417938028
❄ The score 0.6950819219287211 is not better ❄


In [11]:
get_roc_auc_score(LinearRegression(fit_intercept=False))

Avg ROC AUC score of training set is: 0.7033248869724663
Avg ROC AUC score of valuation set is: 0.6969978278954306
❄ The score 0.6925878814617707 is not better ❄


In [12]:
get_roc_auc_score(LinearSVR())

Avg ROC AUC score of training set is: 0.49110185598356715
Avg ROC AUC score of valuation set is: 0.4888657961376053
❄ The score 0.4877726629607978 is not better ❄


In [13]:
# get_roc_auc_score(PassiveAggressiveRegressor())

In [14]:
# get_roc_auc_score(GaussianProcessClassifier())

In [15]:
# get_roc_auc_score(RandomForestClassifier())

In [16]:
# get_roc_auc_score(RandomForestClassifier(n_estimators = 150, random_state = 8, max_depth = 5, min_samples_leaf = 2))

In [17]:
# get_roc_auc_score(RandomForestClassifier(n_estimators = 150, random_state = 8, max_depth = 3, min_samples_leaf = 2))

In [18]:
# get_roc_auc_score(GradientBoostingClassifier())

In [19]:
# get_roc_auc_score(GradientBoostingClassifier(learning_rate=0.1, max_depth = 5))

In [20]:
# get_roc_auc_score(GradientBoostingClassifier(learning_rate=0.05, max_depth = 5))

In [21]:
# get_roc_auc_score(GradientBoostingClassifier(learning_rate=0.01, max_depth = 5))

In [22]:
# get_roc_auc_score(GradientBoostingClassifier(learning_rate=0.005, max_depth = 5))

In [23]:
# get_roc_auc_score(AdaBoostClassifier())

In [24]:
# get_roc_auc_score(GaussianNB())