# REGULARIZED REGRESSION

In [1]:
import warnings
warnings.filterwarnings("ignore")

import os
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

def regression(data_train, data_test):
    
    X_train = data_train.drop(columns=["price"])
    y_train = data_train["price"]
    X_test = data_test.drop(columns=["price"])
    y_test = data_test["price"]

    alphas = np.logspace(-4, -1, 4)
    l1_grid = np.arange(0.6, 1, 0.1)

    ridge_pipe = make_pipeline(
        StandardScaler(), 
        linear_model.RidgeCV(alphas=alphas, cv=5),
    )
    lasso_pipe = make_pipeline(
        StandardScaler(), 
        linear_model.LassoCV(alphas=alphas, cv=5, random_state=0),
    )
    elastic_pipe = make_pipeline(
        StandardScaler(), 
        linear_model.ElasticNetCV(alphas=alphas, l1_ratio=l1_grid, cv=5, random_state=0),
    )

    ridge_pipe.fit(X_train, y_train)
    lasso_pipe.fit(X_train, y_train)
    elastic_pipe.fit(X_train, y_train)

    ridge_coef = pd.DataFrame(
        {"variable": X_train.columns, "coef": ridge_pipe[-1].coef_}
    )
    lasso_coef = pd.DataFrame(
        {"variable": X_train.columns, "coef": lasso_pipe[-1].coef_}
    )
    elastic_coef = pd.DataFrame(
        {"variable": X_train.columns, "coef": elastic_pipe[-1].coef_}
    )

    ridge_coef = ridge_coef[abs(ridge_coef["coef"]) > 1.0]
    lasso_coef = lasso_coef[lasso_coef["coef"] != 0]
    elastic_coef = elastic_coef[abs(elastic_coef["coef"]) > 1.0]

    # Compare regularization methods
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Use models with optimal hyperparameters
    ridge_pred = ridge_pipe.predict(X_test)
    lasso_pred = lasso_pipe.predict(X_test)
    elastic_pred = elastic_pipe.predict(X_test)

    ridge_mse = mean_squared_error(y_test, ridge_pred)
    lasso_mse = mean_squared_error(y_test, lasso_pred)
    elastic_mse = mean_squared_error(y_test, elastic_pred)

    ridge_mape = mean_absolute_percentage_error(y_test, ridge_pred)
    lasso_mape = mean_absolute_percentage_error(y_test, lasso_pred)
    elastic_mape = mean_absolute_percentage_error(y_test, elastic_pred)

    return {
        "ridge": {
            "alpha": ridge_pipe[-1].alpha_,
            "pred": np.round(ridge_pipe.predict(X_test), 2),
            "coefficients": ridge_coef,
            "MSE and MAPE": [np.round(ridge_mse, 2), np.round(ridge_mape, 4)],
        },
        "lasso": {
            "alpha": lasso_pipe[-1].alpha_,
            "pred": np.round(lasso_pipe.predict(X_test), 2),
            "coefficients": lasso_coef,
            "MSE and MAPE": [np.round(lasso_mse, 2), np.round(lasso_mape, 4)],
        },
        "elastic_net": {
            "alpha": elastic_pipe[-1].alpha_,
            "l1_ratio": elastic_pipe[-1].l1_ratio_,
            "pred": np.round(elastic_pipe.predict(X_test), 2),
            "coefficients": elastic_coef,
            "MSE and MAPE": [np.round(elastic_mse, 2), np.round(elastic_mape, 4)],
        }
    }

In [2]:
data_directory = r"C:\Users\sb013698\Desktop\ML Test\Datasets\Regularized Regression"
data_train = pd.read_csv(os.path.join(data_directory, "data_train.csv"))
data_test = pd.read_csv(os.path.join(data_directory, "data_test.csv"))

regression_results = regression(data_train, data_test)
print(f'Ridge >>>> MSE and MAPE: {regression_results["ridge"]["MSE and MAPE"]}')
print(f'Lasso >>>> MSE and MAPE: {regression_results["lasso"]["MSE and MAPE"]}')
print(f'ElasticNet >>>> MSE and MAPE: {regression_results["elastic_net"]["MSE and MAPE"]}')

Ridge >>>> MSE and MAPE: [627.21, 0.1505]
Lasso >>>> MSE and MAPE: [812.8, 0.1527]
ElasticNet >>>> MSE and MAPE: [1028.56, 0.1513]


# END