<a href="https://colab.research.google.com/github/pspvv/ML_Workshop/blob/main/50SU_Linear-L1-L2_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [None]:
# Create DataFrame
df = pd.read_csv('/content/50_Startups.csv')
df.sample(5)
df = pd.get_dummies(df, columns=["State"], drop_first=True)

In [None]:
X = df.drop("Profit",axis=1).values
y = df["Profit"].values

In [None]:
X.shape, y.shape

((50, 5), (50,))

In [None]:
y = y.reshape(-1, 1)

In [None]:
# 1. Multiple Linear Regression (No Regularization)
lr = LinearRegression()
lr.fit(X, y)

In [None]:
y_pred_lr = lr.predict(X)

In [None]:
# Calculate metrics
print("MAE", mean_absolute_error(y, y_pred_lr))
print("MSE", mean_squared_error(y, y_pred_lr))
print("RMSE", np.sqrt(mean_squared_error(y, y_pred_lr)))
print("R2 Score", r2_score(y, y_pred_lr))

MAE 6475.500708609342
MSE 78406792.88803768
RMSE 8854.761029414496
R2 Score 0.9507524843355148


In [None]:
lr.score(X, y)

0.9507524843355148

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((40, 5), (10, 5), (40, 1), (10, 1))

In [None]:
# 2. L1 Regularization (Lasso)
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)

In [None]:
y_pred_lasso = lasso.predict(X_train)

In [None]:
# Calculate metrics on trained data
print("MAE", mean_absolute_error(y_train, y_pred_lasso))
print("MSE", mean_squared_error(y_train, y_pred_lasso))
print("RMSE", np.sqrt(mean_squared_error(y_train, y_pred_lasso)))
print("R2 Score", r2_score(y_train, y_pred_lasso))

MAE 6662.622932690499
MSE 79700060.27993599
RMSE 8927.489024352592
R2 Score 0.9537019994102155


In [None]:
y_pred_lasso2 = lasso.predict(X_test)

In [None]:
# Calculate metrics on test data
print("MAE", mean_absolute_error(y_test, y_pred_lasso2))
print("MSE", mean_squared_error(y_test, y_pred_lasso2))
print("RMSE", np.sqrt(mean_squared_error(y_test, y_pred_lasso2)))
print("R2 Score", r2_score(y_test, y_pred_lasso2))

MAE 6961.4878705766805
MSE 82009745.37455888
RMSE 9055.923220443008
R2 Score 0.8987274041838984


In [None]:
lr.score(X, y)

0.9507524843355148

In [None]:
# 3. L2 Regularization (Ridge)
ridge = Ridge(alpha=0.1)
ridge.fit(X_train, y_train)

In [None]:
y_pred_ridge = ridge.predict(X_train)

In [None]:
# Calculate metrics on train data
print("MAE", mean_absolute_error(y_train, y_pred_ridge))
print("MSE", mean_squared_error(y_train, y_pred_ridge))
print("RMSE", np.sqrt(mean_squared_error(y_train, y_pred_ridge)))
print("R2 Score", r2_score(y_train, y_pred_ridge))

MAE 6661.840583104
MSE 79700094.54582748
RMSE 8927.490943474962
R2 Score 0.9537019795050579


In [None]:
y_pred_ridge2 = ridge.predict(X_test)

In [None]:
# Calculate metrics on test data
print("MAE", mean_absolute_error(y_test, y_pred_ridge2))
print("MSE", mean_squared_error(y_test, y_pred_ridge2))
print("RMSE", np.sqrt(mean_squared_error(y_test, y_pred_ridge2)))
print("R2 Score", r2_score(y_test, y_pred_ridge2))

MAE 6961.690918661945
MSE 81996139.21167986
RMSE 9055.171959255102
R2 Score 0.8987442062289184


In [None]:
##By default ridge regression score method gives r^2 score
lr.score(X_test, y_test)

0.9179169317955789