# Nested cross validation (log reg as example)

In [None]:
import sklearn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV, KFold, cross_val_score

In [None]:
# define the model
log_reg = LogisticRegression(solver='liblinear', multi_class='ovr')

# define the grid of hyperparameters
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100],
              'penalty': ['l1', 'l2'],
               'max_iter': [100, 200, 300, 400, 500]}

# configure the nested cross-validation procedure
inner_cv = KFold(n_splits=5, shuffle=True, random_state=12345)
outer_cv = KFold(n_splits=5, shuffle=True, random_state=12345)

# search
grid_search = GridSearchCV(log_reg, param_grid, cv=inner_cv)
cross_val_scores = cross_val_score(grid_search, X_train, y_train, cv=outer_cv)

grid_search.fit(X_train, y_train)

print("Mean cross-validation score:", np.mean(cross_val_scores))
print("Best hyperparameters:", grid_search.best_params_)

# evaluate the best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", accuracy)

# classification report
print(classification_report(y_test, y_pred))
