In [7]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split

class RidgeRegression:
    
    def __init__(self, alpha=1.0):
        self.alpha = alpha
        
    def fit(self, X, y):
        X = np.insert(X, 0, 1, axis=1) # add bias term
        n, p = X.shape
        I = np.identity(p)
        self.beta = np.linalg.inv(X.T @ X + self.alpha * I) @ X.T @ y
        
    def predict(self, X):
        X = np.insert(X, 0, 1, axis=1) # add bias term
        # Return predicted values using learned coefficients
        return X @ self.beta
    
class LassoRegression:
    
    def __init__(self, alpha=1.0, max_iter=1000, tol=1e-4):
        self.alpha = alpha
        self.max_iter = max_iter
        self.tol = tol
        
    def soft_thresholding(self, x, gamma):
        # Soft-thresholding operator used in Lasso regression
        return np.sign(x) * np.maximum(np.abs(x) - gamma, 0)
    
    def fit(self, X, y):
        X = np.insert(X, 0, 1, axis=1) # add bias term
        n, p = X.shape
        self.beta = np.zeros(p)

        # Coordinate descent algorithm for Lasso regression
        for i in range(self.max_iter):
            beta_old = self.beta.copy()
            for j in range(p):
                if j == 0:
                    self.beta[j] = np.mean(y - X[:, 1:] @ self.beta[1:])
                else:
                    xj = X[:, j]
                    y_pred = X @ self.beta
                    rho = xj @ (y - y_pred + self.beta[j] * xj)
                    self.beta[j] = self.soft_thresholding(rho, self.alpha) / (xj @ xj)
            if np.linalg.norm(self.beta - beta_old) < self.tol:
                break
                
    def predict(self, X):
        X = np.insert(X, 0, 1, axis=1) # add bias term
        return X @ self.beta

# Generate data
np.random.seed(123)
X = np.random.normal(size=(100, 50))
beta = np.random.normal(size=50)
y = X @ beta + np.random.normal(size=100)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

# Train Ridge Regression
ridge = RidgeRegression(alpha=1.0)
ridge.fit(X_train, y_train)
y_pred_ridge = ridge.predict(X_test)
r2_ridge = r2_score(y_test, y_pred_ridge)
rmse_ridge = np.sqrt(mean_squared_error(y_test, y_pred_ridge))
print(f'Ridge Regression R^2: {r2_ridge:.3f}')
print(f'Ridge Regression RMSE: {rmse_ridge:.3f}')

# Sort Ridge coefficients
coefficients_ridge = pd.DataFrame({'Variable': ['Intercept'] + [f'X{i+1}' for i in range(50)], 'Coefficient': ridge.beta})
coefficients_ridge_sorted = coefficients_ridge.iloc[coefficients_ridge['Coefficient'].abs().argsort()[::-1]]
print('Ridge Regression Coefficients:')
print(coefficients_ridge_sorted)

Ridge Regression R^2: 0.953
Ridge Regression RMSE: 1.576
Ridge Regression Coefficients:
     Variable  Coefficient
31        X31    -2.165682
25        X25     1.826146
14        X14    -1.777560
44        X44     1.660277
48        X48    -1.640986
35        X35     1.627803
11        X11    -1.554899
2          X2     1.553672
15        X15     1.521414
13        X13     1.469234
3          X3    -1.464724
28        X28    -1.328707
22        X22    -1.111655
5          X5    -1.093651
12        X12    -1.011048
43        X43     1.000706
49        X49     0.945784
39        X39     0.909925
7          X7    -0.894591
38        X38     0.885944
46        X46    -0.821420
23        X23    -0.820812
33        X33     0.702328
8          X8    -0.664038
9          X9     0.656454
19        X19    -0.626802
30        X30    -0.592613
27        X27    -0.584768
6          X6    -0.583465
50        X50     0.552211
16        X16    -0.538915
32        X32     0.517840
21        X21    -0.4

In [8]:
# Train Lasso Regression
lasso = LassoRegression(alpha=1.0, max_iter=10000, tol=1e-4)
lasso.fit(X_train, y_train)
y_pred_lasso = lasso.predict(X_test)
r2_lasso = r2_score(y_test, y_pred_lasso)
rmse_lasso = np.sqrt(mean_squared_error(y_test, y_pred_lasso))
print(f'Lasso Regression R^2: {r2_lasso:.3f}')
print(f'Lasso Regression RMSE: {rmse_lasso:.3f}')

# Sort Lasso coefficients
coefficients_lasso = pd.DataFrame({'Variable': ['Intercept'] + [f'X{i+1}' for i in range(50)], 'Coefficient': lasso.beta})
coefficients_lasso_sorted = coefficients_lasso.iloc[coefficients_lasso['Coefficient'].abs().argsort()[::-1]]
print('Lasso Regression Coefficients:')
print(coefficients_lasso_sorted)

Lasso Regression R^2: 0.951
Lasso Regression RMSE: 1.608
Lasso Regression Coefficients:
     Variable  Coefficient
31        X31    -2.199419
25        X25     1.889155
14        X14    -1.812348
44        X44     1.709623
48        X48    -1.695314
35        X35     1.608901
11        X11    -1.606593
2          X2     1.586983
15        X15     1.515911
3          X3    -1.493726
13        X13     1.487373
28        X28    -1.352580
22        X22    -1.135287
5          X5    -1.093026
12        X12    -0.994399
43        X43     0.992386
49        X49     0.958890
7          X7    -0.912147
39        X39     0.903474
38        X38     0.899365
46        X46    -0.819633
23        X23    -0.797478
33        X33     0.692733
9          X9     0.650821
19        X19    -0.636272
8          X8    -0.620660
30        X30    -0.593640
6          X6    -0.591293
27        X27    -0.560124
50        X50     0.552044
32        X32     0.525015
16        X16    -0.506968
21        X21    -0.4