# 1.3 Code Brief: Train and Compare Regularized Models

Quick reference for training and comparing regularized logistic regression models.

## Setup

In [None]:
import pandas as pd
import numpy as np
import pickle
import plotly.express as px
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.metrics import make_scorer, f1_score, precision_score, recall_score

## Load Data and Models

In [None]:
root_filepath = '/content/drive/MyDrive/projects/Applied-Data-Analytics-For-Higher-Education-Course-2/'
data_filepath = f'{root_filepath}data/'
course3_models = f'{root_filepath}course_3/models/'

df_training = pd.read_csv(f'{data_filepath}training.csv')
X_train = df_training
y_train = df_training['SEM_3_STATUS']

l2_model = pickle.load(open(f'{course3_models}l2_ridge_logistic_model.pkl', 'rb'))
l1_model = pickle.load(open(f'{course3_models}l1_lasso_logistic_model.pkl', 'rb'))
elasticnet_model = pickle.load(open(f'{course3_models}elasticnet_logistic_model.pkl', 'rb'))

models = {
    'L2 (Ridge)': l2_model,
    'L1 (Lasso)': l1_model,
    'ElasticNet': elasticnet_model
}

## Train All Models

In [None]:
trained_models = {}
for name, model in models.items():
    print(f"Training {name}...", end=" ")
    model.fit(X_train, y_train)
    trained_models[name] = model
    print("Done!")

## Compare Coefficients

In [None]:
preprocessor = trained_models['L2 (Ridge)'].named_steps['preprocessing']
feature_names = preprocessor.get_feature_names_out()
feature_names_clean = [name.split('__')[-1] for name in feature_names]

coef_data = []
for name, model in trained_models.items():
    classifier = model.named_steps['classifier']
    for feat, coef in zip(feature_names_clean, classifier.coef_[0]):
        coef_data.append({'Model': name, 'Feature': feat, 'Coefficient': coef})

coef_df = pd.DataFrame(coef_data)
fig = px.bar(coef_df, x='Feature', y='Coefficient', color='Model', barmode='group',
             title='Coefficient Comparison Across Regularization Types', height=500)
fig.update_xaxes(tickangle=45)
fig.show()

## L1 Feature Selection

In [None]:
l1_classifier = trained_models['L1 (Lasso)'].named_steps['classifier']
l1_coefficients = l1_classifier.coef_[0]

l1_selection = pd.DataFrame({
    'Feature': feature_names_clean,
    'Coefficient': l1_coefficients,
    'Selected': l1_coefficients != 0
}).sort_values('Coefficient', key=abs, ascending=False)

print(f"Total features: {len(l1_selection)}")
print(f"Selected (non-zero): {l1_selection['Selected'].sum()}")
print(f"Eliminated (zero): {(~l1_selection['Selected']).sum()}")
display(l1_selection)

## Cross-Validation Comparison

In [None]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
f1_scorer = make_scorer(f1_score, pos_label='N')
precision_scorer = make_scorer(precision_score, pos_label='N')
recall_scorer = make_scorer(recall_score, pos_label='N')

cv_results = []
for name, model in models.items():
    print(f"Cross-validating {name}...")
    f1_scores = cross_val_score(model, X_train, y_train, cv=cv, scoring=f1_scorer)
    precision_scores = cross_val_score(model, X_train, y_train, cv=cv, scoring=precision_scorer)
    recall_scores = cross_val_score(model, X_train, y_train, cv=cv, scoring=recall_scorer)
    cv_results.append({
        'Model': name,
        'F1 Mean': f1_scores.mean(),
        'Precision Mean': precision_scores.mean(),
        'Recall Mean': recall_scores.mean()
    })

cv_df = pd.DataFrame(cv_results)
display(cv_df)

## Save Trained Models

In [None]:
for name, model in trained_models.items():
    filename = name.lower().replace(' ', '_').replace('(', '').replace(')', '')
    filepath = f'{course3_models}{filename}_trained.pkl'
    pickle.dump(model, open(filepath, 'wb'))
    print(f"Saved: {filepath}")