## Imports

In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import joblib
import pickle

from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV

## Export data

In [2]:
with open('Saved data/X_train_scaled.pickle', 'rb') as data:
    X_train_scaled = pickle.load(data)
    
with open('Saved data/X_test_scaled.pickle', 'rb') as data:
    X_test_scaled = pickle.load(data)

with open('Saved data/y_train.pickle', 'rb') as data:
    y_train = pickle.load(data)
    
with open('Saved data/y_test.pickle', 'rb') as data:
    y_test = pickle.load(data)
    
with open('Saved data/labels.pickle', 'rb') as data:
    labels = pickle.load(data)

## LR grid search

In [4]:
model = LogisticRegression()
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
space = dict()
space['solver'] = ['newton-cg', 'lbfgs', 'saga']
space['penalty'] = ['l2', 'elasticnet', 'l1']
space['C'] = np.logspace(-1.5, 1.5, 5)
space['class_weight'] = ['balanced']

In [None]:
search = GridSearchCV(model, space, scoring='recall_weighted', n_jobs=-1, cv=cv, verbose=2, return_train_score=True)
search.fit(X_train_scaled, y_train.to_numpy().flatten())
searchPredictions = search.predict(X_test_scaled)
print(classification_report(y_test, searchPredictions, target_names=labels))

Fitting 30 folds for each of 180 candidates, totalling 5400 fits


In [None]:
joblib.dump(search, 'Saved models & utils/LR_GridSearch2.pkl')