# 1.4 Code Brief: Tune Regularization Hyperparameters

Quick reference for hyperparameter tuning with GridSearchCV.

## Setup

In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, classification_report, confusion_matrix, make_scorer

## Load Data and Models

In [None]:
root_filepath = '/content/drive/MyDrive/projects/Applied-Data-Analytics-For-Higher-Education-Course-2/'
data_filepath = f'{root_filepath}data/'
course3_models = f'{root_filepath}course_3/models/'

df_training = pd.read_csv(f'{data_filepath}training.csv')
df_testing = pd.read_csv(f'{data_filepath}testing.csv')

X_train, y_train = df_training, df_training['SEM_3_STATUS']
X_test, y_test = df_testing, df_testing['SEM_3_STATUS']

l2_model = pickle.load(open(f'{course3_models}l2_ridge_logistic_model.pkl', 'rb'))
l1_model = pickle.load(open(f'{course3_models}l1_lasso_logistic_model.pkl', 'rb'))
elasticnet_model = pickle.load(open(f'{course3_models}elasticnet_logistic_model.pkl', 'rb'))

## Setup Cross-Validation

In [None]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
f1_scorer = make_scorer(f1_score, pos_label='N')

## Grid Search for L2 Model

In [None]:
C_values = [0.001, 0.01, 0.1, 1.0, 10.0, 100.0]
l2_param_grid = {'classifier__C': C_values}

l2_grid_search = GridSearchCV(l2_model, l2_param_grid, cv=cv, scoring=f1_scorer, return_train_score=True, n_jobs=-1)
l2_grid_search.fit(X_train, y_train)

print(f"L2 Best C: {l2_grid_search.best_params_['classifier__C']}")
print(f"L2 Best CV F1: {l2_grid_search.best_score_:.4f}")

## Grid Search for L1 Model

In [None]:
l1_param_grid = {'classifier__C': C_values}

l1_grid_search = GridSearchCV(l1_model, l1_param_grid, cv=cv, scoring=f1_scorer, return_train_score=True, n_jobs=-1)
l1_grid_search.fit(X_train, y_train)

print(f"L1 Best C: {l1_grid_search.best_params_['classifier__C']}")
print(f"L1 Best CV F1: {l1_grid_search.best_score_:.4f}")

## Grid Search for ElasticNet Model

In [None]:
l1_ratio_values = [0.1, 0.3, 0.5, 0.7, 0.9]
elasticnet_param_grid = {
    'classifier__C': C_values,
    'classifier__l1_ratio': l1_ratio_values
}

elasticnet_grid_search = GridSearchCV(elasticnet_model, elasticnet_param_grid, cv=cv, scoring=f1_scorer, return_train_score=True, n_jobs=-1)
elasticnet_grid_search.fit(X_train, y_train)

print(f"ElasticNet Best C: {elasticnet_grid_search.best_params_['classifier__C']}")
print(f"ElasticNet Best l1_ratio: {elasticnet_grid_search.best_params_['classifier__l1_ratio']}")
print(f"ElasticNet Best CV F1: {elasticnet_grid_search.best_score_:.4f}")

## Select Best Model

In [None]:
comparison_results = [
    {'Model': 'L2 (Ridge)', 'Best Score': l2_grid_search.best_score_},
    {'Model': 'L1 (Lasso)', 'Best Score': l1_grid_search.best_score_},
    {'Model': 'ElasticNet', 'Best Score': elasticnet_grid_search.best_score_}
]
comparison_df = pd.DataFrame(comparison_results)
display(comparison_df)

best_idx = comparison_df['Best Score'].idxmax()
best_model_name = comparison_df.loc[best_idx, 'Model']

if best_model_name == 'L2 (Ridge)':
    best_model = l2_grid_search.best_estimator_
elif best_model_name == 'L1 (Lasso)':
    best_model = l1_grid_search.best_estimator_
else:
    best_model = elasticnet_grid_search.best_estimator_

print(f"\nBest Model: {best_model_name}")

## Evaluate on Test Set

In [None]:
y_pred = best_model.predict(X_test)

print(f"Test F1 Score: {f1_score(y_test, y_pred, pos_label='N'):.4f}")
print(f"Test Precision: {precision_score(y_test, y_pred, pos_label='N'):.4f}")
print(f"Test Recall: {recall_score(y_test, y_pred, pos_label='N'):.4f}")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

## Save Best Model

In [None]:
best_model_filename = best_model_name.lower().replace(' ', '_').replace('(', '').replace(')', '')
save_path = f'{course3_models}{best_model_filename}_tuned.pkl'
pickle.dump(best_model, open(save_path, 'wb'))
print(f"Saved: {save_path}")