## Question 6

In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [10]:
## Import linear regression model from q5
class LinearRegressionGD:
    
    def __init__(self, alpha=0.01, iterations=100):
        self.alpha = alpha
        self.iterations = iterations
        self.theta = None
        
    def fit(self, X, y):
        n = X.shape[0]
        
        ones = np.ones((n, 1))
        X_b = np.hstack((ones, X))
        self.theta = np.zeros(X_b.shape[1])
        
        for _ in range(self.iterations):
            predictions = X_b @ self.theta
            errors = predictions - y
            
            gradient = (2/n) * (errors.T @ X_b)
            self.theta -= self.alpha * gradient
            
    def predict(self, X):
        n = X.shape[0]
        ones = np.ones((n, 1))
        X_b = np.hstack((ones, X))
        return X_b @ self.theta

## Question 6.2

In [4]:
## Define ridge regression using gradient descent
class RidgeRegressionGD:
    
    def __init__(self, alpha=0.01, iterations=100, lam=0.0):
        self.alpha = alpha
        self.iterations = iterations
        self.lam = lam
        self.theta = None
        
    def fit(self, X, y):
        n = X.shape[0]
        
        ones = np.ones((n, 1))
        X_b = np.hstack((ones, X))
        self.theta = np.zeros(X_b.shape[1])
        
        for _ in range(self.iterations):
            predictions = X_b @ self.theta
            errors = predictions - y

            reg_term = self.lam * self.theta.copy()
            reg_term[0] = 0
            
            gradient = (2 / n) * (errors.T @ errors + reg_term)
            self.theta -= self.alpha * gradient
            
    def predict(self, X):
        n = X.shape[0]
        ones = np.ones((n, 1))
        X_b = np.hstack((ones, X))
        return X_b @ self.theta

## Question 6.2

In [5]:
## Initalize data
np.random.seed(42)
N = 1000

X = np.random.uniform(-2, 2, (N, 1))
e = np.random.normal(0, 2, N)
y = 1 + 2 * X.flatten() + e

In [14]:
## Train using linear regression for comparison
alphas = [0.1]
iterations = [50, 100]
results = [('a', 'iterations', 'MSE', 'R^2')]

for a in alphas:
    for i in iterations:
        model = LinearRegressionGD(a, i)

        model.fit(X, y)

        y_pred = model.predict(X)

        mse = mean_squared_error(y, y_pred)
        r2 = r2_score(y, y_pred)

        results.append((a, i, mse, r2))

results_df = pd.DataFrame(results[1:], columns=results[0])

print(results_df.round(3).to_string()) 

     a  iterations  MSE    R^2
0  0.1          50  3.9  0.564
1  0.1         100  3.9  0.564


In [16]:
## Train model

lam = [1, 10, 100, 1000, 10000]
results = [("lambda", 'slope', 'MSE', 'R^2')]

for l in lam:
    model = RidgeRegressionGD(0.1, 100, l)

    model.fit(X, y)

    y_pred = model.predict(X)

    mse = mean_squared_error(y, y_pred)
    r2 = r2_score(y, y_pred)

    results.append((l, model.theta[1], mse, r2))

results_df = pd.DataFrame(results[1:], columns=results[0])

print(results_df.round(3).to_string())     

   lambda         slope           MSE           R^2
0       1  1.921000e+00  3.900000e+00  5.640000e-01
1      10  1.909000e+00  3.900000e+00  5.640000e-01
2     100  1.791000e+00  3.923000e+00  5.610000e-01
3    1000  1.109000e+00  4.802000e+00  4.630000e-01
4   10000 -6.974259e+09  6.644111e+19 -7.429968e+18


Compared with the linear regression, the results are almost exactly the same, with a MSE of 3.9 and R^2 of 0.564. However, one notable change is that the higher the value of lambda goes, the worse the model gets. It stays about the same for lambda=1 and lambda = 10, but at 100 we see a slight drop off and by 10000 the model has gotten very bad. This means that lambda works better for lower numbers and will get worse at a certain point when it is increased too much. We can also see the slope decrease slightly as lambda increase, by 10000 getting to a very large negative number.