## 3. L1 and L2 regularization
Comparing:
- Linear regression with Lasso and Ridge
- Linear regression using closed form solution with and without regularization
- Linear regression using gradient descent with and without regularization

In [1]:
import pandas as pd

data = pd.read_csv('data/StudentPerformanceFactors.csv')

X = data.drop("Exam_Score", axis=1)
Y = data['Exam_Score']

from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.75)

num_cols = X.select_dtypes(include=['int64', 'float64']).columns
cat_cols = X.select_dtypes(include=['object', 'category']).columns

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

from sklearn.preprocessing import OneHotEncoder

cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

from sklearn.compose import ColumnTransformer

preprocessor = ColumnTransformer([
    ('num', num_pipeline, num_cols),
    ('cat', cat_pipeline, cat_cols)
])

#### Lasso and Ridge

In [70]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso, Ridge

L1 = Lasso()
L2 = Ridge()
sklearn_LR = LinearRegression()

sklearn_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', sklearn_LR)
])

L1_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', L1)
])

L2_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', L2)
])

In [71]:
from sklearn.model_selection import GridSearchCV
import numpy as np

L1_param_grid = {
    'regressor__alpha': np.logspace(-5, -1, 9)
}

L2_param_grid = {
    'regressor__alpha': np.linspace(1, 10, 10)
}

l1_grid = GridSearchCV(L1_pipeline, L1_param_grid, cv=5, scoring='r2', n_jobs=-1)
l1_grid.fit(X_train, Y_train)

l2_grid = GridSearchCV(L2_pipeline, L2_param_grid, cv=5, scoring='r2', n_jobs=-1)
l2_grid.fit(X_train, Y_train)

sklearn_pipeline.fit(X_train, Y_train)

print("Lasso Best Params:", l1_grid.best_params_)
print("Ridge Best Params:", l2_grid.best_params_)

y_pred_l1 = l1_grid.predict(X_test)
y_pred_l2 = l2_grid.predict(X_test)
y_pred_skl = sklearn_pipeline.predict(X_test)

Lasso Best Params: {'regressor__alpha': np.float64(0.001)}
Ridge Best Params: {'regressor__alpha': np.float64(9.0)}


#### Evaluating the models

In [None]:
model_names = ['Lasso (L1)', 'Ridge (L2)', 'Linear Regression']
models = [l1_grid, l2_grid, sklearn_pipeline]
metrics = ['R² (train)', 'R² (test)']

results = {
    name: [
        model.score(X_train, Y_train),
        model.score(X_test, Y_test)
    ]
    for (name, model) in zip(model_names, models) 
}

df_compare = pd.DataFrame(results, index=metrics).T.round(6)

display(df_compare)

Unnamed: 0,R² (train),R² (test)
Lasso (L1),0.736555,0.698352
Ridge (L2),0.736553,0.698365
Linear Regression,0.736563,0.698338


In [73]:
from src.linear_regression.model_evaluation import metrics_table

df_metrics = metrics_table(Y_test,
                           [y_pred_l1, y_pred_l2, y_pred_skl],
                           ['Lasso (L1)', 'Ridge (L2)', 'Linear Regression'])

display(df_metrics)

Unnamed: 0,R²,MSE,RMSE,MAE
Lasso (L1),0.698352,4.665759,2.160037,0.510651
Ridge (L2),0.698365,4.665566,2.159992,0.510828
Linear Regression,0.698338,4.665985,2.160089,0.510609


#### Comparing weights

In [74]:
from src.linear_regression.model_evaluation import weights_table

weights_df = weights_table(data, X,
                           [l1_grid.best_estimator_.named_steps['regressor'],
                            l2_grid.best_estimator_.named_steps['regressor'],
                            sklearn_LR],
                           ['Lasso',
                            'Ridge',
                            'Linear Regression'])

display(weights_df)

Unnamed: 0,Feature,Lasso,Ridge,Linear Regression
0,Hours_Studied,1.77208,1.769784,1.7732
1,Attendance,2.278392,2.275209,2.279449
2,Sleep_Hours,-0.001198,-0.002314,-0.002066
3,Previous_Scores,0.686444,0.686243,0.687438
4,Tutoring_Sessions,0.628561,0.628423,0.629571
5,Physical_Activity,0.181699,0.182305,0.183075
6,Parental_Involvement - Low,1.074642,1.015001,1.022632
7,Parental_Involvement - Medium,-0.905978,-0.958564,-0.966809
8,Parental_Involvement - High,0.0,-0.056436,-0.055823
9,Access_to_Resources - High,1.038871,1.04406,1.051774


### Adding regularization to own models

In [2]:
from src.linear_regression.models import LinearRegressionClosedForm, LinearRegressionGradientDescent

closed_form_LR = LinearRegressionClosedForm()

closed_form_LR_L2 = LinearRegressionClosedForm(regularization=True, alpha=1)

gradient_descent_LR = LinearRegressionGradientDescent()

gradient_descent_LR_L1 = LinearRegressionGradientDescent(regularization='l1', alpha=0.005)
gradient_descent_LR_L2 = LinearRegressionGradientDescent(regularization='l2', alpha=0.001)
gradient_descent_LR_en = LinearRegressionGradientDescent(regularization='elasticnet', alpha=0.001)

closed_form_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', closed_form_LR)
])

cf_LR_L2_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', closed_form_LR_L2)
])

gradient_descent_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', gradient_descent_LR)
])

gd_LR_L1_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', gradient_descent_LR_L1)
])

gd_LR_L2_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', gradient_descent_LR_L2)
])

gd_LR_en_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', gradient_descent_LR_en)
])

In [3]:
closed_form_pipeline.fit(X_train, Y_train)
cf_LR_L2_pipeline.fit(X_train, Y_train)

gradient_descent_pipeline.fit(X_train, Y_train)
gd_LR_L1_pipeline.fit(X_train, Y_train)
gd_LR_L2_pipeline.fit(X_train, Y_train)
gd_LR_en_pipeline.fit(X_train, Y_train)

y_pred_cf = closed_form_pipeline.predict(X_test)
y_pred_cf_L2 = cf_LR_L2_pipeline.predict(X_test)

y_pred_gd = gradient_descent_pipeline.predict(X_test)
y_pred_gd_l1 = gd_LR_L1_pipeline.predict(X_test)
y_pred_gd_l2 = gd_LR_L2_pipeline.predict(X_test)
y_pred_gd_en = gd_LR_en_pipeline.predict(X_test)

#### Evaluation

In [7]:
from src.linear_regression.model_evaluation import metrics_table, weights_table

df_metrics_reg = metrics_table(Y_test,
                           [y_pred_cf_L2, y_pred_cf, y_pred_gd_l1, y_pred_gd_l2, y_pred_gd_en, y_pred_gd],
                           ['Closed Form with L2',
                            'Closed for without regularization',
                            'Gradient Descent with L1',
                            'Gradient Descent with L2',
                            'Gradient Descent with both (0.5 ratio)',
                            'Gradient Descent without regularization'])

display(df_metrics_reg)

Unnamed: 0,R²,MSE,RMSE,MAE
Closed Form with L2,0.712961,4.382613,2.093469,0.494107
Closed for without regularization,0.713051,4.381247,2.093143,0.494949
Gradient Descent with L1,0.712641,4.387498,2.094636,0.479899
Gradient Descent with L2,0.711775,4.400722,2.09779,0.533559
Gradient Descent with both (0.5 ratio),0.712303,4.392664,2.095868,0.480956
Gradient Descent without regularization,0.712594,4.388217,2.094807,0.519343


#### Weights

In [8]:
models = [gradient_descent_LR_L1, gradient_descent_LR_L2, gradient_descent_LR_en, gradient_descent_LR]
model_names = ['GD with L1', 'GD with L2', 'GD with both', 'GD without regularization']

df_weights_reg = weights_table(data, X, models, model_names)

display(df_weights_reg)

Unnamed: 0,Feature,GD with L1,GD with L2,GD with both,GD without regularization
0,Hours_Studied,1.734659,1.735537,1.744164,1.727822
1,Attendance,2.29945,2.320974,2.304786,2.304989
2,Sleep_Hours,0.003831,-0.028801,0.022623,-0.002028
3,Previous_Scores,0.70377,0.699343,0.715277,0.706033
4,Tutoring_Sessions,0.621422,0.628884,0.600873,0.622672
5,Physical_Activity,0.193402,0.173456,0.172764,0.167928
6,Parental_Involvement - Low,4.47552,4.511502,4.536194,4.593968
7,Parental_Involvement - Medium,2.484916,2.518638,2.544783,2.607913
8,Parental_Involvement - High,3.395059,3.423815,3.453327,3.522345
9,Access_to_Resources - High,4.504629,4.537298,4.565418,4.628701
