# First, let's import the required libraries and create a 10x5 dataframe with random values. The first 4 columns represent the predictor values (X), and the last column represents the output values (Y).

In [1]:
import numpy as np
import pandas as pd

# Seed for reproducibility
np.random.seed(42)

# Create a 10x5 dataframe with random values
data = np.random.rand(10, 5)
df = pd.DataFrame(data, columns=["X1", "X2", "X3", "X4", "Y"])
df

Unnamed: 0,X1,X2,X3,X4,Y
0,0.37454,0.950714,0.731994,0.598658,0.156019
1,0.155995,0.058084,0.866176,0.601115,0.708073
2,0.020584,0.96991,0.832443,0.212339,0.181825
3,0.183405,0.304242,0.524756,0.431945,0.291229
4,0.611853,0.139494,0.292145,0.366362,0.45607
5,0.785176,0.199674,0.514234,0.592415,0.04645
6,0.607545,0.170524,0.065052,0.948886,0.965632
7,0.808397,0.304614,0.097672,0.684233,0.440152
8,0.122038,0.495177,0.034389,0.90932,0.25878
9,0.662522,0.311711,0.520068,0.54671,0.184854


# Now, let's separate the predictor values (X) and the output values (Y) into two NumPy arrays.

In [9]:
# Separate predictor values (X) and output values (Y)
X = df.iloc[:, :4].values  # First 4 columns (X1, X2, X3, X4)
Y = df.iloc[:, 4].values   # Last column (Y)

# Next, we'll calculate the matrix $X^TX$ and the regularization term  $\lambda I$. We'll use $\lambda = 0.1$ in this example.

In [11]:
# Calculate the matrix X^T * X
XT_X = np.matmul(X.T, X)

# Calculate the regularization term lambda * I
_lambda = 0.1
I = np.identity(4)  # 4x4 identity matrix
lambda_I = _lambda * I
pd.DataFrame(lambda_I)

Unnamed: 0,0,1,2,3
0,0.1,0.0,0.0,0.0
1,0.0,0.1,0.0,0.0
2,0.0,0.0,0.1,0.0
3,0.0,0.0,0.0,0.1


# Now, let's add the matrix $X^TX$ and the regularization term $\lambda I$, and then calculate the inverse of the resulting matrix.

In [13]:
# Add the matrix X^T * X and the regularization term lambda * I
XT_X_lambda_I = XT_X + lambda_I

# Calculate the inverse of the resulting matrix
inv_XT_X_lambda_I = np.linalg.inv(XT_X_lambda_I)
pd.DataFrame(XT_X_lambda_I)

Unnamed: 0,0,1,2,3
0,2.765987,1.299831,1.572405,2.693695
1,1.299831,2.56408,2.076691,2.101754
2,1.572405,2.076691,2.989617,2.218136
3,2.693695,2.101754,2.218136,4.030886


# Next, we'll calculate the matrix $X^TY$.

In [5]:
# Calculate the matrix X^T * Y
XT_Y = np.matmul(X.T, Y)

# Finally, let's calculate the ridge regression coefficients $\hat{\beta}$ by multiplying the inverse matrix from the previous step by the matrix $X^TY$.

In [6]:
# Calculate the ridge regression coefficients (Beta_hat)
Beta_hat = np.matmul(inv_XT_X_lambda_I, XT_Y)
Beta_hat

array([-0.03654932, -0.29677872,  0.19096687,  0.677389  ])

# Now, we have the ridge regression coefficients $\hat{\beta}$ for each predictor in our example dataframe. These coefficients can be used to create a model that can predict the output values $Y$ based on the predictor values $X$.

## Let's try using Scikit-learn to do this

In [15]:
from sklearn.linear_model import Ridge

# Create a Ridge regression model with alpha (lambda) = 0.1
ridge_model = Ridge(alpha=0.1, normalize = False)

# Fit the model to the data (X and Y)
ridge_model.fit(X, Y)

# Obtain the coefficients (Beta_hat)
ridge_model.coef_



array([-0.21255822, -0.41107767, -0.06072781,  0.32377762])