## Time to test a few ML models

In [1]:
import joblib
import pandas as pd

# ML algorithms
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)

from sklearn.model_selection import GridSearchCV

In [2]:
X_train = pd.read_csv("Data\X_Train.csv")
y_train = pd.read_csv("Data\y_train.csv")

In [3]:
def print_results (model):
    print(f"Best parameters: {model.best_params_}\n")
    
    meanTestScores = model.cv_results_["mean_test_score"]
    stdTestScores = model.cv_results_["std_test_score"]
    paramsTested = model.cv_results_["params"]
    
    for mean, std, params in zip(meanTestScores, stdTestScores, paramsTested):
        print(f"{round(mean, 3)}, (+/-{round(std * 2, 3)} for {params})")

In [4]:
def save_results (model, name):
    joblib.dump(model.best_estimator_, fr"Models\\{name}.pkl")

## Logistic Regression

In [5]:
lr = LogisticRegression()
parameters_lr = {"C": [0.001, 0.01, 0.1, 1, 10, 100, 1000]}

cv = GridSearchCV(lr, parameters_lr, cv = 5)
cv.fit(X_train, y_train.values.ravel())

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [6]:
print_results(cv)

Best parameters: {'C': 1}

0.87, (+/-0.022 for {'C': 0.001})
0.87, (+/-0.022 for {'C': 0.01})
0.87, (+/-0.022 for {'C': 0.1})
0.924, (+/-0.072 for {'C': 1})
0.892, (+/-0.068 for {'C': 10})
0.886, (+/-0.093 for {'C': 100})
0.892, (+/-0.09 for {'C': 1000})


In [7]:
save_results(cv, "LR_model")

## Support Vector Machines

In [8]:
svc = SVC()
parameters_svc = {"C": [0.1, 1, 10, 100], 
                  "gamma": [1, 0.1, 0.01, 0.001], 
                  "kernel": ["rbf", "poly", "sigmoid"]}

cv = GridSearchCV(svc, parameters_svc, cv = 5)
cv.fit(X_train, y_train.values.ravel())

In [9]:
print_results(cv)

Best parameters: {'C': 10, 'gamma': 0.001, 'kernel': 'poly'}

0.87, (+/-0.022 for {'C': 0.1, 'gamma': 1, 'kernel': 'rbf'})
0.881, (+/-0.081 for {'C': 0.1, 'gamma': 1, 'kernel': 'poly'})
0.87, (+/-0.022 for {'C': 0.1, 'gamma': 1, 'kernel': 'sigmoid'})
0.87, (+/-0.022 for {'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf'})
0.908, (+/-0.1 for {'C': 0.1, 'gamma': 0.1, 'kernel': 'poly'})
0.87, (+/-0.022 for {'C': 0.1, 'gamma': 0.1, 'kernel': 'sigmoid'})
0.87, (+/-0.022 for {'C': 0.1, 'gamma': 0.01, 'kernel': 'rbf'})
0.908, (+/-0.055 for {'C': 0.1, 'gamma': 0.01, 'kernel': 'poly'})
0.87, (+/-0.022 for {'C': 0.1, 'gamma': 0.01, 'kernel': 'sigmoid'})
0.87, (+/-0.022 for {'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'})
0.87, (+/-0.022 for {'C': 0.1, 'gamma': 0.001, 'kernel': 'poly'})
0.87, (+/-0.022 for {'C': 0.1, 'gamma': 0.001, 'kernel': 'sigmoid'})
0.881, (+/-0.043 for {'C': 1, 'gamma': 1, 'kernel': 'rbf'})
0.881, (+/-0.081 for {'C': 1, 'gamma': 1, 'kernel': 'poly'})
0.87, (+/-0.022 for {'C': 1, 'gamma':

In [10]:
save_results(cv, "SVM_model")

## Multilayer Perceptron

In [11]:
myLittlePony = MLPClassifier()
parameters_mlp = {
    'hidden_layer_sizes': [(10,), (50,), (100,)],
    'activation': ['relu', 'tanh', 'logistic'],
    'learning_rate': ['constant', 'invscaling', 'adaptive']
}

cv = GridSearchCV(myLittlePony, parameters_mlp, cv = 5)
cv.fit(X_train, y_train.values.ravel())





In [12]:
print_results(cv)

Best parameters: {'activation': 'relu', 'hidden_layer_sizes': (100,), 'learning_rate': 'invscaling'}

0.589, (+/-0.698 for {'activation': 'relu', 'hidden_layer_sizes': (10,), 'learning_rate': 'constant'})
0.87, (+/-0.022 for {'activation': 'relu', 'hidden_layer_sizes': (10,), 'learning_rate': 'invscaling'})
0.876, (+/-0.026 for {'activation': 'relu', 'hidden_layer_sizes': (10,), 'learning_rate': 'adaptive'})
0.876, (+/-0.026 for {'activation': 'relu', 'hidden_layer_sizes': (50,), 'learning_rate': 'constant'})
0.876, (+/-0.026 for {'activation': 'relu', 'hidden_layer_sizes': (50,), 'learning_rate': 'invscaling'})
0.87, (+/-0.022 for {'activation': 'relu', 'hidden_layer_sizes': (50,), 'learning_rate': 'adaptive'})
0.87, (+/-0.022 for {'activation': 'relu', 'hidden_layer_sizes': (100,), 'learning_rate': 'constant'})
0.881, (+/-0.026 for {'activation': 'relu', 'hidden_layer_sizes': (100,), 'learning_rate': 'invscaling'})
0.87, (+/-0.022 for {'activation': 'relu', 'hidden_layer_sizes': (100

In [13]:
save_results(cv, "MLP_model")

## Random Forest

In [14]:
rf = RandomForestClassifier()
parameters_rf = {
    "n_estimators": [5, 50, 250],
    "max_depth": [2, 4, 8, 16, 32, None]
}
cv = GridSearchCV(rf, parameters_rf, cv = 5)
cv.fit(X_train, y_train.values.ravel())

In [15]:
print_results(cv)

Best parameters: {'max_depth': 16, 'n_estimators': 50}

0.87, (+/-0.022 for {'max_depth': 2, 'n_estimators': 5})
0.87, (+/-0.022 for {'max_depth': 2, 'n_estimators': 50})
0.87, (+/-0.022 for {'max_depth': 2, 'n_estimators': 250})
0.908, (+/-0.055 for {'max_depth': 4, 'n_estimators': 5})
0.897, (+/-0.053 for {'max_depth': 4, 'n_estimators': 50})
0.903, (+/-0.043 for {'max_depth': 4, 'n_estimators': 250})
0.903, (+/-0.121 for {'max_depth': 8, 'n_estimators': 5})
0.919, (+/-0.059 for {'max_depth': 8, 'n_estimators': 50})
0.919, (+/-0.059 for {'max_depth': 8, 'n_estimators': 250})
0.908, (+/-0.131 for {'max_depth': 16, 'n_estimators': 5})
0.924, (+/-0.04 for {'max_depth': 16, 'n_estimators': 50})
0.919, (+/-0.059 for {'max_depth': 16, 'n_estimators': 250})
0.908, (+/-0.055 for {'max_depth': 32, 'n_estimators': 5})
0.924, (+/-0.072 for {'max_depth': 32, 'n_estimators': 50})
0.919, (+/-0.059 for {'max_depth': 32, 'n_estimators': 250})
0.914, (+/-0.053 for {'max_depth': None, 'n_estimators': 

In [16]:
save_results(cv, "RF_model")

## Gradient Boosting

In [17]:
gb = GradientBoostingClassifier()
parameters_gb = {
    "n_estimators": [5, 50, 250, 500],
    "max_depth": [1, 3, 5, 7, 9],
    "learning_rate": [0.01, 0.1, 1, 10, 100]
}

cv = GridSearchCV(gb, parameters_gb, cv = 5)
cv.fit(X_train, y_train.values.ravel())

In [18]:
print_results(cv)

Best parameters: {'learning_rate': 0.01, 'max_depth': 1, 'n_estimators': 500}

0.87, (+/-0.022 for {'learning_rate': 0.01, 'max_depth': 1, 'n_estimators': 5})
0.87, (+/-0.022 for {'learning_rate': 0.01, 'max_depth': 1, 'n_estimators': 50})
0.87, (+/-0.022 for {'learning_rate': 0.01, 'max_depth': 1, 'n_estimators': 250})
0.908, (+/-0.043 for {'learning_rate': 0.01, 'max_depth': 1, 'n_estimators': 500})
0.87, (+/-0.022 for {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 5})
0.87, (+/-0.022 for {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 50})
0.903, (+/-0.065 for {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 250})
0.881, (+/-0.163 for {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 500})
0.87, (+/-0.022 for {'learning_rate': 0.01, 'max_depth': 5, 'n_estimators': 5})
0.87, (+/-0.022 for {'learning_rate': 0.01, 'max_depth': 5, 'n_estimators': 50})
0.881, (+/-0.131 for {'learning_rate': 0.01, 'max_depth': 5, 'n_estimators': 250})
0.87, (+/-0.12 for {'lea

In [19]:
save_results(cv, "GB_model")