# Ridge Regression
Ridge regression is a type of regression analysis that performs L2 regularization, which adds a penalty term to the loss function to prevent overfitting. The penalty term is proportional to the magnitude of the model coefficients.

In [1]:
from sklearn.linear_model import Ridge
import numpy as np

# Example data
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
# Target values
y = np.dot(X, np.array([1, 2])) + 3

# Ridge Regression Model
ridge_reg = Ridge(alpha=1.0)  # alpha is the equivalent of lambda in the formula
ridge_reg.fit(X, y)

# Coefficients
print("Coefficients:", ridge_reg.coef_)
# Intercept
print("Intercept:", ridge_reg.intercept_)

Coefficients: [0.8 1.4]
Intercept: 4.5


## Linear Regression vs Ridge Regression

In [3]:
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, mean_absolute_percentage_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import numpy as np

# Load Titanic dataset
df = sns.load_dataset('titanic')

### Data Pre-processing

In [4]:
# Selecting a subset of columns for simplicity
columns_to_use = ['survived', 'pclass', 'sex', 'age', 'fare']
df = df[columns_to_use]

# Handling missing values
df['age'].fillna(df['age'].median(), inplace=True)

# Define feature and target variable
X = df.drop('survived', axis=1)
y = df['survived']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['age'].fillna(df['age'].median(), inplace=True)


### Creating a Pipeline

In [5]:
# Define a pipeline for OneHotEncoding and model
categorical_features = ['sex']
numerical_features = ['pclass', 'age', 'fare']

# Preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numerical_features),
        ('cat', OneHotEncoder(), categorical_features)])

# Linear Regression Pipeline
lr_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                              ('regressor', LinearRegression())])

# Ridge Regression Pipeline
ridge_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                                 ('regressor', Ridge(alpha=1.0))])

### train and Evaluate the Models

In [6]:
# Train and evaluate Linear Regression
lr_pipeline.fit(X_train, y_train)
lr_pred = lr_pipeline.predict(X_test)
lr_mse = mean_squared_error(y_test, lr_pred)
lr_r2 = r2_score(y_test, lr_pred)
lr_mae = mean_absolute_error(y_test, lr_pred)
lr_mape = mean_absolute_percentage_error(y_test, lr_pred)
lr_rmse = np.sqrt(lr_mse)

# Train and evaluate Ridge Regression
ridge_pipeline.fit(X_train, y_train)
ridge_pred = ridge_pipeline.predict(X_test)
ridge_mse = mean_squared_error(y_test, ridge_pred)
ridhe_r2 = r2_score(y_test, ridge_pred)
ridge_mae = mean_absolute_error(y_test, ridge_pred)
ridge_mape = mean_absolute_percentage_error(y_test, ridge_pred)
ridge_rmse = np.sqrt(ridge_mse)

print("Linear Regression MSE:", lr_mse)
print("Ridge Regression MSE:", ridge_mse)

print("Linear Regression R2:", lr_r2)
print("Ridge Regression R2:", ridhe_r2)

print("Linear Regression MAE:", lr_mae)
print("Ridge Regression MAE:", ridge_mae)

print("Linear Regression MAPE:", lr_mape)
print("Ridge Regression MAPE:", ridge_mape)

print("Linear Regression RMSE:", lr_rmse)
print("Ridge Regression RMSE:", ridge_rmse)

Linear Regression MSE: 0.13684268526287452
Ridge Regression MSE: 0.13686022744784476
Linear Regression R2: 0.4223219395905452
Ridge Regression R2: 0.42224788568426963
Linear Regression MAE: 0.28882295584163387
Ridge Regression MAE: 0.2892312673071361
Linear Regression MAPE: 697272156502681.8
Ridge Regression MAPE: 698032476179648.6
Linear Regression RMSE: 0.3699225395442599
Ridge Regression RMSE: 0.3699462494036732
