# Ridge Regression

In [15]:
from sklearn.linear_model import Ridge
import numpy as np

# Example data
X = np.array([[1,1], [1,2], [2,2], [2,3]])
# Target value
y = np.dot(X, np.array([1,2])) + 3

#Ridge regression model
ridge_reg = Ridge(alpha=1.0) # alpha is the equivalent of lambda in the formula
ridge_reg.fit(X, y)

# Coefficients
print('Coefficients:', ridge_reg.coef_)
# Intercept
print('Intercept:', ridge_reg.intercept_)

Coefficients: [0.8 1.4]
Intercept: 4.5


# Comparing Simple linear regression vs. Ridge regression

## 1. import libraries and load the data

In [16]:
# import libraries
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_squared_error, root_mean_squared_error, r2_score, mean_absolute_error, mean_absolute_percentage_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# load the dataset
df = sns.load_dataset('titanic')

## 2. Preprocess the data

In [17]:
# selecting the subset of columns for simplicity
columns_to_use = ['survived', 'pclass', 'sex', 'age', 'fare']
df = df[columns_to_use]

# handling missing values
df['age'].fillna(df['age'].median(), inplace=True)

# define feature and target variables
X = df.drop('survived', axis=1)
y = df['survived']

# train test split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,  random_state=0)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['age'].fillna(df['age'].median(), inplace=True)


## 3. Creating a Pipline

In [18]:
# Define a pipline for OneHotEncoding and model
categorical_features = ['sex']
numarical_features = ['pclass', 'age', 'fare']

# preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numarical_features),
        ('cat', OneHotEncoder(), categorical_features)])

# linear regression pipline
lr_pipline = Pipeline(steps=[('preprocessor', preprocessor),
                             ('regressor', LinearRegression())])

# Ridge regression pipline
ridge_pipline = Pipeline(steps=[('preprocessor', preprocessor),
                                ('regressor', Ridge(alpha=1.0))])

## 4. Train and Evaluate the model

In [19]:
# train and evaluate linear regression
lr_pipline.fit(X_train, y_train)
lr_pred = lr_pipline.predict(X_test)
lr_mse = mean_squared_error(y_test, lr_pred)
lr_rmse = np.sqrt(lr_mse)
lr_mae = mean_absolute_error(y_test, lr_pred)
lr_mape = mean_absolute_percentage_error(y_test, lr_pred)
lr_r2 = r2_score(y_test, lr_pred)


# train and evaluate Ridge regression
ridge_pipline.fit(X_train, y_train)
ridge_pred = ridge_pipline.predict(X_test)
ridge_mse = mean_squared_error(y_test, ridge_pred)
ridge_rmse = np.sqrt(ridge_mse)
ridge_mae = mean_absolute_error(y_test, ridge_pred)
ridge_mape = mean_absolute_percentage_error(y_test, ridge_pred)
ridge_r2 = r2_score(y_test, ridge_pred)

print('Linear Regression MSE:', lr_mse)
print('Rdige Regression MSE:', ridge_mse)
print('Linear Regression RMSE:', lr_rmse)
print('Rdige Regression RMSE:', ridge_rmse)
print('Linear Regression MAE:', lr_mae)
print('Rdige Regression MAE:', ridge_mae)
print('Linear Regression MAPE:', lr_mape)
print('Rdige Regression MAPE:', ridge_mape)
print('Linear Regression R2:', lr_r2)
print('Rdige Regression R2:', ridge_r2)

Linear Regression MSE: 0.13684268526287452
Rdige Regression MSE: 0.1368602274478447
Linear Regression RMSE: 0.3699225395442599
Rdige Regression RMSE: 0.3699462494036731
Linear Regression MAE: 0.28882295584163387
Rdige Regression MAE: 0.28923126730713655
Linear Regression MAPE: 697272156502681.8
Rdige Regression MAPE: 698032476179649.4
Linear Regression R2: 0.4223219395905452
Rdige Regression R2: 0.42224788568426985
