# 1.2 Code Brief: Build Regularized Logistic Regression Models

Quick reference for building L1, L2, and ElasticNet regularized models.

## Setup

In [None]:
import pandas as pd
import pickle
import os
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler, StandardScaler, OneHotEncoder

## Load Data

In [None]:
# Set up file paths
root_filepath = '/content/drive/MyDrive/projects/Applied-Data-Analytics-For-Higher-Education-Course-2/'
data_filepath = f'{root_filepath}data/'
course3_filepath = f'{root_filepath}course_3/'

df_training = pd.read_csv(f'{data_filepath}training.csv')
X_train = df_training
y_train = df_training['SEM_3_STATUS']

## Define Feature Groups

In [None]:
minmax_columns = ['HS_GPA', 'GPA_1', 'GPA_2', 'DFW_RATE_1', 'DFW_RATE_2']
standard_columns = ['UNITS_ATTEMPTED_1', 'UNITS_ATTEMPTED_2']
categorical_columns = ['GENDER', 'RACE_ETHNICITY', 'FIRST_GEN_STATUS']

## Create Preprocessor

In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('minmax', MinMaxScaler(), minmax_columns),
        ('standard', StandardScaler(), standard_columns),
        ('onehot', OneHotEncoder(handle_unknown='ignore', 
                                  drop=['Female', 'Other', 'Unknown'], 
                                  sparse_output=False), categorical_columns)
    ],
    remainder='drop'
)

## Build L2 (Ridge) Model

In [None]:
l2_logistic_model = Pipeline([
    ('preprocessing', preprocessor),
    ('classifier', LogisticRegression(
        penalty='l2',
        C=1.0,
        class_weight='balanced',
        solver='lbfgs',
        max_iter=1000,
        random_state=42
    ))
])

## Build L1 (Lasso) Model

In [None]:
l1_logistic_model = Pipeline([
    ('preprocessing', preprocessor),
    ('classifier', LogisticRegression(
        penalty='l1',
        C=1.0,
        class_weight='balanced',
        solver='saga',  # Required for L1
        max_iter=1000,
        random_state=42
    ))
])

## Build ElasticNet Model

In [None]:
elasticnet_logistic_model = Pipeline([
    ('preprocessing', preprocessor),
    ('classifier', LogisticRegression(
        penalty='elasticnet',
        C=1.0,
        l1_ratio=0.5,  # 50% L1, 50% L2
        class_weight='balanced',
        solver='saga',  # Required for ElasticNet
        max_iter=1000,
        random_state=42
    ))
])

## Save Models

In [None]:
models = {
    'L2 (Ridge)': l2_logistic_model,
    'L1 (Lasso)': l1_logistic_model,
    'ElasticNet': elasticnet_logistic_model
}

models_path = f'{course3_filepath}models/'
os.makedirs(models_path, exist_ok=True)

for name, model in models.items():
    filename = name.lower().replace(' ', '_').replace('(', '').replace(')', '')
    filepath = f'{models_path}{filename}_logistic_model.pkl'
    pickle.dump(model, open(filepath, 'wb'))
    print(f"Saved: {filepath}")

## Key Parameters

| Parameter | Description | Values |
|:----------|:------------|:-------|
| `penalty` | Regularization type | 'l1', 'l2', 'elasticnet' |
| `C` | Inverse regularization strength | float > 0 |
| `solver` | Optimization algorithm | 'saga' for L1/ElasticNet |
| `l1_ratio` | ElasticNet mixing | 0-1 (1=L1, 0=L2) |