In [1]:
import os
import numpy as np
import pandas as pd
import pickle

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import ShuffleSplit, GridSearchCV

pd.set_option("display.max_rows", None, "display.max_columns", None)

## Getting the data ready

In [2]:
def get_datasets(span = 5):
    path = os.path.abspath(f'../../data/dataset/women/{span}span_training_set.csv')
    training_df = pd.read_csv(path)

    path = os.path.abspath(f'../../data/dataset/women/{span}span_testing_set.csv')
    testing_df = pd.read_csv(path)

    train_true, test_true = training_df.pop('Win'), testing_df.pop('Win')

    print(f'{len(training_df)} train examples')
    print(f'{len(testing_df)} test examples')

    return training_df, testing_df, train_true, test_true

## Training the model

In [3]:
def train_model(estimator, param_grid, training_df, train_true, n_splits = 5):
    # Create the grid search object
    grid_search = GridSearchCV(estimator, param_grid, cv=ShuffleSplit(n_splits), verbose=5)

    # Fit the grid search to the data
    grid_search.fit(training_df, train_true)

    # Get the best hyperparameters
    best_params = grid_search.best_params_
    print("Best hyperparameters:", best_params)

    return grid_search.best_estimator_

## Testing the model

In [4]:
def test_model(clf, testing_df, test_true):
    y_pred = clf.predict(testing_df)

    accuracy = accuracy_score(test_true, y_pred)
    print(f"Accuracy: {(accuracy*100):.2f}")

    print("\nClassification Report:")
    print(classification_report(test_true, y_pred))

    print("\nConfusion Matrix:")
    print(confusion_matrix(test_true, y_pred))

## Saving the models

In [5]:
def save_model(model, filename):
    path = os.path.abspath(f'../model/womens/{filename}')
    pickle.dump(model, open(path, 'wb'))

## Constant Variables

In [6]:
SPANS = [3, 5, 7]

In [7]:
def train_test_save(estimator, param_grid, filename, spans = [3, 5, 7], n_splits = 5):
    for span in spans:
        print(f'----- Span: {span} -----')
        
        training_df, testing_df, train_true, test_true = get_datasets(span)
        clf = train_model(estimator, param_grid, training_df, train_true, n_splits)
        test_model(clf, testing_df, test_true)
        save_model(clf, f'{span}span_{filename}')

# Logistic Regression

In [8]:
# Define the Logistic Regression model
logreg_model = LogisticRegression()

# Define the hyperparameter grid
param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
    'penalty': ['l2'],  # Vary the regularization type
    'solver': ['lbfgs', 'newton-cholesky'],
    'max_iter': [int(10e10)]
}

filename = 'logistic_regression_model.pkl'

train_test_save(logreg_model, param_grid, filename)

----- Span: 3 -----
12390 train examples
5310 test examples
Fitting 5 folds for each of 14 candidates, totalling 70 fits
[CV 1/5] END C=0.001, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.706 total time=   0.8s
[CV 2/5] END C=0.001, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.704 total time=   0.7s
[CV 3/5] END C=0.001, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.719 total time=   1.0s
[CV 4/5] END C=0.001, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.726 total time=   0.8s
[CV 5/5] END C=0.001, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.730 total time=   0.9s
[CV 1/5] END C=0.001, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.705 total time=   0.0s
[CV 2/5] END C=0.001, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.704 total time=   0.0s
[CV 3/5] END C=0.001, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.718 total time=   0.0s
[CV 4/5] END C=0.001, max

STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END C=10, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.710 total time=  16.9s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END C=10, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.706 total time=  18.0s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END C=10, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.716 total time=  17.8s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END C=10, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.716 total time=  17.0s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END C=10, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.731 total time=  16.9s
[CV 1/5] END C=10, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.711 total time=   0.0s
[CV 2/5] END C=10, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.705 total time=   0.0s
[CV 3/5] END C=10, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.716 total time=   0.0s
[CV 4/5] END C=10, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.713 total time=   0.0s
[CV 5/5] END C=10, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.728 total time=   0.0s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END C=100, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.709 total time=  17.5s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END C=100, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.706 total time=  18.1s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END C=100, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.716 total time=  17.9s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END C=100, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.713 total time=  17.0s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END C=100, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.731 total time=  18.4s
[CV 1/5] END C=100, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.712 total time=   0.0s
[CV 2/5] END C=100, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.702 total time=   0.0s
[CV 3/5] END C=100, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.710 total time=   0.0s
[CV 4/5] END C=100, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.716 total time=   0.0s
[CV 5/5] END C=100, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.730 total time=   0.0s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END C=1000, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.710 total time=  17.2s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END C=1000, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.707 total time=  17.3s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END C=1000, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.718 total time=  17.1s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END C=1000, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.712 total time=  17.1s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END C=1000, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.731 total time=  17.4s
[CV 1/5] END C=1000, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.711 total time=   0.0s
[CV 2/5] END C=1000, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.702 total time=   0.0s
[CV 3/5] END C=1000, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.706 total time=   0.0s
[CV 4/5] END C=1000, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.714 total time=   0.0s
[CV 5/5] END C=1000, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.730 total time=   0.0s
Best hyperparameters: {'C': 0.01, 'max_iter': 100000000000, 'penalty': 'l2', 'solver': 'newton-cholesky'}
Accuracy: 72.86

Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.62      0.66      2199
           1       0.75      0.80      0.78      3111

    accuracy                

STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END C=10, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.701 total time=  14.6s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END C=10, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.713 total time=  14.6s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END C=10, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.725 total time=  13.0s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END C=10, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.702 total time=  13.5s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END C=10, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.749 total time=  13.6s
[CV 1/5] END C=10, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.700 total time=   0.0s
[CV 2/5] END C=10, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.713 total time=   0.0s
[CV 3/5] END C=10, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.720 total time=   0.0s
[CV 4/5] END C=10, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.702 total time=   0.0s
[CV 5/5] END C=10, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.749 total time=   0.0s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END C=100, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.697 total time=  13.3s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END C=100, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.713 total time=  14.2s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END C=100, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.722 total time=  14.7s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END C=100, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.701 total time=  13.3s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END C=100, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.748 total time=  13.4s
[CV 1/5] END C=100, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.706 total time=   0.0s
[CV 2/5] END C=100, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.708 total time=   0.0s
[CV 3/5] END C=100, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.726 total time=   0.0s
[CV 4/5] END C=100, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.701 total time=   0.0s
[CV 5/5] END C=100, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.747 total time=   0.0s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END C=1000, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.702 total time=  14.9s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END C=1000, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.712 total time=  15.9s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END C=1000, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.722 total time=  15.8s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END C=1000, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.704 total time=  14.4s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END C=1000, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.749 total time=  14.1s
[CV 1/5] END C=1000, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.709 total time=   0.0s
[CV 2/5] END C=1000, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.704 total time=   0.0s
[CV 3/5] END C=1000, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.721 total time=   0.0s
[CV 4/5] END C=1000, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.699 total time=   0.0s
[CV 5/5] END C=1000, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.752 total time=   0.0s
Best hyperparameters: {'C': 0.1, 'max_iter': 100000000000, 'penalty': 'l2', 'solver': 'lbfgs'}
Accuracy: 72.91

Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.62      0.65      1992
           1       0.75      0.81      0.78      2844

    accuracy                           

STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END C=10, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.723 total time=  11.2s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END C=10, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.721 total time=  11.0s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END C=10, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.728 total time=  11.6s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END C=10, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.751 total time=  10.4s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END C=10, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.700 total time=  10.2s
[CV 1/5] END C=10, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.726 total time=   0.0s
[CV 2/5] END C=10, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.720 total time=   0.0s
[CV 3/5] END C=10, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.731 total time=   0.0s
[CV 4/5] END C=10, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.746 total time=   0.0s
[CV 5/5] END C=10, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.703 total time=   0.0s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END C=100, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.725 total time=  11.9s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END C=100, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.718 total time=  11.6s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END C=100, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.732 total time=  11.9s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END C=100, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.752 total time=  11.9s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END C=100, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.703 total time=  10.5s
[CV 1/5] END C=100, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.724 total time=   0.0s
[CV 2/5] END C=100, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.718 total time=   0.0s
[CV 3/5] END C=100, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.735 total time=   0.0s
[CV 4/5] END C=100, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.747 total time=   0.0s
[CV 5/5] END C=100, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.704 total time=   0.0s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END C=1000, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.725 total time=  10.1s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END C=1000, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.720 total time=   9.8s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END C=1000, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.728 total time=  10.8s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END C=1000, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.751 total time=  11.7s


STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END C=1000, max_iter=100000000000, penalty=l2, solver=lbfgs;, score=0.709 total time=  12.0s
[CV 1/5] END C=1000, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.729 total time=   0.0s
[CV 2/5] END C=1000, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.720 total time=   0.0s
[CV 3/5] END C=1000, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.732 total time=   0.0s
[CV 4/5] END C=1000, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.748 total time=   0.0s
[CV 5/5] END C=1000, max_iter=100000000000, penalty=l2, solver=newton-cholesky;, score=0.708 total time=   0.0s
Best hyperparameters: {'C': 1000, 'max_iter': 100000000000, 'penalty': 'l2', 'solver': 'newton-cholesky'}
Accuracy: 73.10

Classification Report:
              precision    recall  f1-score   support

           0       0.70      0.62      0.66      1829
           1       0.75      0.81      0.78      2543

    accuracy                

# Support Vector Machine

In [9]:
# Define the SVM model
svm_model = SVC()

# Define the hyperparameter grid
# param_grid = {
#     'C': [0.01, 0.1, 1, 10],
#     'kernel': ['linear', 'rbf', 'sigmoid'],
#     'gamma': [0.1, 0.01, 0.001],
#     # 'probability': [True]
# }

param_grid = {
    'C': [0.01],
    'kernel': ['linear'],
    'gamma': [0.1],
    'probability': [True]
}

filename = 'support_vector_machine_model.pkl'

train_test_save(svm_model, param_grid, filename, n_splits=1)

----- Span: 3 -----
12390 train examples
5310 test examples
Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV 1/1] END C=0.01, gamma=0.1, kernel=linear, probability=True;, score=0.724 total time= 1.5min
Best hyperparameters: {'C': 0.01, 'gamma': 0.1, 'kernel': 'linear', 'probability': True}
Accuracy: 73.03

Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.62      0.66      2199
           1       0.75      0.81      0.78      3111

    accuracy                           0.73      5310
   macro avg       0.72      0.71      0.72      5310
weighted avg       0.73      0.73      0.73      5310


Confusion Matrix:
[[1370  829]
 [ 603 2508]]
----- Span: 5 -----
11282 train examples
4836 test examples
Fitting 1 folds for each of 1 candidates, totalling 1 fits
[CV 1/1] END C=0.01, gamma=0.1, kernel=linear, probability=True;, score=0.714 total time= 1.1min
Best hyperparameters: {'C': 0.01, 'gamma': 0.1, 'kernel': 'linear'

# K-Nearest Neighbors (KNN)

In [10]:
# Define the KNN model
knn = KNeighborsClassifier()

# Define the hyperparameter grid
param_grid = {
    'n_neighbors': [1, 3, 5, 7, 10],  # Number of neighbors
    'weights': ['uniform', 'distance'],  # Weighting scheme
    'p': [1, 2],  # Distance metric (1 for Manhattan, 2 for Euclidean)
    'leaf_size': [15, 30, 45],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
}

filename = 'knn_model.pkl'

train_test_save(knn, param_grid, filename, n_splits=1)

----- Span: 3 -----
12390 train examples
5310 test examples
Fitting 1 folds for each of 240 candidates, totalling 240 fits
[CV 1/1] END algorithm=auto, leaf_size=15, n_neighbors=1, p=1, weights=uniform;, score=0.628 total time=   0.5s
[CV 1/1] END algorithm=auto, leaf_size=15, n_neighbors=1, p=1, weights=distance;, score=0.628 total time=   0.3s
[CV 1/1] END algorithm=auto, leaf_size=15, n_neighbors=1, p=2, weights=uniform;, score=0.621 total time=   0.1s
[CV 1/1] END algorithm=auto, leaf_size=15, n_neighbors=1, p=2, weights=distance;, score=0.621 total time=   0.0s
[CV 1/1] END algorithm=auto, leaf_size=15, n_neighbors=3, p=1, weights=uniform;, score=0.662 total time=   0.3s
[CV 1/1] END algorithm=auto, leaf_size=15, n_neighbors=3, p=1, weights=distance;, score=0.661 total time=   0.3s
[CV 1/1] END algorithm=auto, leaf_size=15, n_neighbors=3, p=2, weights=uniform;, score=0.662 total time=   0.1s
[CV 1/1] END algorithm=auto, leaf_size=15, n_neighbors=3, p=2, weights=distance;, score=0.

# Random Forests

In [11]:
# Define the Random Forest model
rfc = RandomForestClassifier()

# Define the hyperparameter grid
param_grid = {
    'n_estimators': [100, 200, 500],  # Number of trees in the forest
    'max_features': ['auto', 'sqrt', 'log2'],  # Maximum number of features considered for splitting
    'max_depth': [4, 5, 6, 7, 8, None],  # Maximum depth of each tree
    'criterion': ['gini', 'entropy', 'log_loss']  # Split quality criterion
}

filename = 'random_forest.pkl'

train_test_save(rfc, param_grid, filename, n_splits=1)

----- Span: 3 -----
12390 train examples
5310 test examples
Fitting 1 folds for each of 162 candidates, totalling 162 fits
[CV 1/1] END criterion=gini, max_depth=4, max_features=auto, n_estimators=100;, score=nan total time=   0.0s
[CV 1/1] END criterion=gini, max_depth=4, max_features=auto, n_estimators=200;, score=nan total time=   0.0s
[CV 1/1] END criterion=gini, max_depth=4, max_features=auto, n_estimators=500;, score=nan total time=   0.0s
[CV 1/1] END criterion=gini, max_depth=4, max_features=sqrt, n_estimators=100;, score=0.716 total time=   3.0s
[CV 1/1] END criterion=gini, max_depth=4, max_features=sqrt, n_estimators=200;, score=0.719 total time=   6.2s
[CV 1/1] END criterion=gini, max_depth=4, max_features=sqrt, n_estimators=500;, score=0.705 total time=  15.7s
[CV 1/1] END criterion=gini, max_depth=4, max_features=log2, n_estimators=100;, score=0.707 total time=   1.7s
[CV 1/1] END criterion=gini, max_depth=4, max_features=log2, n_estimators=200;, score=0.710 total time=   

54 fits failed out of a total of 162.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
54 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\utils\_param_vali

Best hyperparameters: {'criterion': 'entropy', 'max_depth': 8, 'max_features': 'sqrt', 'n_estimators': 100}
Accuracy: 71.15

Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.55      0.61      2199
           1       0.72      0.82      0.77      3111

    accuracy                           0.71      5310
   macro avg       0.71      0.69      0.69      5310
weighted avg       0.71      0.71      0.70      5310


Confusion Matrix:
[[1213  986]
 [ 546 2565]]
----- Span: 5 -----
11282 train examples
4836 test examples
Fitting 1 folds for each of 162 candidates, totalling 162 fits
[CV 1/1] END criterion=gini, max_depth=4, max_features=auto, n_estimators=100;, score=nan total time=   0.0s
[CV 1/1] END criterion=gini, max_depth=4, max_features=auto, n_estimators=200;, score=nan total time=   0.0s
[CV 1/1] END criterion=gini, max_depth=4, max_features=auto, n_estimators=500;, score=nan total time=   0.0s
[CV 1/1] END criterion=gini, 

54 fits failed out of a total of 162.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
54 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\utils\_param_vali

Best hyperparameters: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'sqrt', 'n_estimators': 100}
Accuracy: 71.79

Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.60      0.64      1992
           1       0.74      0.80      0.77      2844

    accuracy                           0.72      4836
   macro avg       0.71      0.70      0.70      4836
weighted avg       0.72      0.72      0.71      4836


Confusion Matrix:
[[1200  792]
 [ 572 2272]]
----- Span: 7 -----
10200 train examples
4372 test examples
Fitting 1 folds for each of 162 candidates, totalling 162 fits
[CV 1/1] END criterion=gini, max_depth=4, max_features=auto, n_estimators=100;, score=nan total time=   0.0s
[CV 1/1] END criterion=gini, max_depth=4, max_features=auto, n_estimators=200;, score=nan total time=   0.0s
[CV 1/1] END criterion=gini, max_depth=4, max_features=auto, n_estimators=500;, score=nan total time=   0.0s
[CV 1/1] END criterion=gin

54 fits failed out of a total of 162.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
54 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\utils\_param_vali

Best hyperparameters: {'criterion': 'entropy', 'max_depth': None, 'max_features': 'sqrt', 'n_estimators': 500}
Accuracy: 72.60

Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.59      0.64      1829
           1       0.74      0.82      0.78      2543

    accuracy                           0.73      4372
   macro avg       0.72      0.71      0.71      4372
weighted avg       0.72      0.73      0.72      4372


Confusion Matrix:
[[1082  747]
 [ 451 2092]]


# Gradient Boosting

In [12]:
# Define the Random Forest model
gbc = GradientBoostingClassifier()

# Define the hyperparameter grid
param_grid = {
    'loss': ['log_loss', 'deviance', 'exponential'],  # Loss function
    'learning_rate': [0.01, 0.05, 0.1],  # Learning rate (eta)
    'max_depth': [3, 5, 8],  # Maximum depth of each tree
    'max_features': ['log2', 'sqrt'],  # Maximum number of features considered for splitting
    'n_estimators': [100, 200, 300]  # Number of trees
}

filename = 'gradient_boosting.pkl'

train_test_save(gbc, param_grid, filename, n_splits=1)

----- Span: 3 -----
12390 train examples
5310 test examples
Fitting 1 folds for each of 162 candidates, totalling 162 fits
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=log2, n_estimators=100;, score=0.672 total time=   1.9s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=log2, n_estimators=200;, score=0.712 total time=   3.9s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=log2, n_estimators=300;, score=0.720 total time=   5.8s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=sqrt, n_estimators=100;, score=0.681 total time=   3.4s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=sqrt, n_estimators=200;, score=0.718 total time=   6.9s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=sqrt, n_estimators=300;, score=0.718 total time=  10.3s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=5, max_features=log2, n_estimators=100;

54 fits failed out of a total of 162.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
54 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\utils\_param_vali

Best hyperparameters: {'learning_rate': 0.1, 'loss': 'exponential', 'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 200}
Accuracy: 71.98

Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.62      0.65      2199
           1       0.75      0.79      0.77      3111

    accuracy                           0.72      5310
   macro avg       0.71      0.71      0.71      5310
weighted avg       0.72      0.72      0.72      5310


Confusion Matrix:
[[1364  835]
 [ 653 2458]]
----- Span: 5 -----
11282 train examples
4836 test examples
Fitting 1 folds for each of 162 candidates, totalling 162 fits
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=log2, n_estimators=100;, score=0.686 total time=   1.8s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=log2, n_estimators=200;, score=0.720 total time=   3.6s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=l

54 fits failed out of a total of 162.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
54 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\utils\_param_vali

Best hyperparameters: {'learning_rate': 0.05, 'loss': 'log_loss', 'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 300}
Accuracy: 71.79

Classification Report:
              precision    recall  f1-score   support

           0       0.67      0.61      0.64      1992
           1       0.75      0.79      0.77      2844

    accuracy                           0.72      4836
   macro avg       0.71      0.70      0.70      4836
weighted avg       0.72      0.72      0.72      4836


Confusion Matrix:
[[1222  770]
 [ 594 2250]]
----- Span: 7 -----
10200 train examples
4372 test examples
Fitting 1 folds for each of 162 candidates, totalling 162 fits
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=log2, n_estimators=100;, score=0.694 total time=   1.7s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=log2, n_estimators=200;, score=0.721 total time=   3.4s
[CV 1/1] END learning_rate=0.01, loss=log_loss, max_depth=3, max_features=log

54 fits failed out of a total of 162.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
54 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\mtayl\anaconda3\envs\mlmb-model-env\Lib\site-packages\sklearn\utils\_param_vali

Best hyperparameters: {'learning_rate': 0.01, 'loss': 'exponential', 'max_depth': 8, 'max_features': 'log2', 'n_estimators': 100}
Accuracy: 71.84

Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.49      0.59      1829
           1       0.71      0.88      0.79      2543

    accuracy                           0.72      4372
   macro avg       0.73      0.69      0.69      4372
weighted avg       0.73      0.72      0.70      4372


Confusion Matrix:
[[ 891  938]
 [ 293 2250]]


# Multilayer Perceptron

In [13]:
# Define the MLP model
mlp_model = MLPClassifier()

# Define the hyperparameter grid
param_grid = {
    'hidden_layer_sizes': [(12,), (23,), (45,), (91,), (181,)],  # Vary the number of neurons in the hidden layer
    'activation': ['relu', 'identity', 'logistic', 'tanh'],  # Vary the activation function
    'solver': ['adam', 'sgd', 'adam'],  # Vary the solver
    'alpha': [0.0001, 0.001],  # Vary the L2 regularization strength
}

filename = 'multilayer_perceptron.pkl'

train_test_save(mlp_model, param_grid, filename, n_splits=1)

----- Span: 3 -----
12390 train examples
5310 test examples
Fitting 1 folds for each of 120 candidates, totalling 120 fits
[CV 1/1] END activation=relu, alpha=0.0001, hidden_layer_sizes=(12,), solver=adam;, score=0.730 total time=   3.5s
[CV 1/1] END activation=relu, alpha=0.0001, hidden_layer_sizes=(12,), solver=sgd;, score=0.737 total time=   3.0s
[CV 1/1] END activation=relu, alpha=0.0001, hidden_layer_sizes=(12,), solver=adam;, score=0.729 total time=   3.3s
[CV 1/1] END activation=relu, alpha=0.0001, hidden_layer_sizes=(23,), solver=adam;, score=0.716 total time=   1.7s
[CV 1/1] END activation=relu, alpha=0.0001, hidden_layer_sizes=(23,), solver=sgd;, score=0.735 total time=   2.9s
[CV 1/1] END activation=relu, alpha=0.0001, hidden_layer_sizes=(23,), solver=adam;, score=0.680 total time=   1.8s
[CV 1/1] END activation=relu, alpha=0.0001, hidden_layer_sizes=(45,), solver=adam;, score=0.703 total time=   2.3s
[CV 1/1] END activation=relu, alpha=0.0001, hidden_layer_sizes=(45,), solv