In [1]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import Lasso
import numpy as np
from my_cross_val import my_cross_val


In [2]:
class MyRidgeRegression():

    def __init__(self, lambda_val):
        self.lambda_val = lambda_val

    def fit(self, X, y):
        self.w = np.linalg.inv(X.transpose()@X + lambda_val*np.identity(X.shape[1]))@X.transpose()@y

    def predict(self, X):
        y_hat = X@self.w
        return y_hat


In [3]:
# load dataset
X, y = fetch_california_housing(return_X_y=True)

num_data, num_features = X.shape

# shuffle dataset
np.random.seed(2023)
perm = np.random.permutation(num_data)

X = X.tolist()
y = y.tolist()

X = [X[i] for i in perm]
y = [y[i] for i in perm]

X = np.array(X)
y = np.array(y)

# append column of 1s to include intercept
X = np.hstack((X, np.ones((num_data, 1))))


In [4]:
# Split dataset into train and test sets
NUM_TRAIN = int(np.ceil(num_data*0.8))
NUM_TEST = num_data - NUM_TRAIN

X_train = X[:NUM_TRAIN]
X_test = X[NUM_TRAIN:]
y_train = y[:NUM_TRAIN]
y_test = y[NUM_TRAIN:]

In [5]:
lambda_vals = [0.01, 0.1, 1, 10, 100]

In [7]:
for lambda_val in lambda_vals:
    print(lambda_val)
    # instantiate ridge regression object
    rr_model = MyRidgeRegression(lambda_val)
    # call to your CV function to compute mse for each fold
    rr_mse_vals = my_cross_val(rr_model, 'mse', X, y)
    # print mse from CV
    print("Ridge Regression MSE by Fold", rr_mse_vals)
    # instantiate lasso object
    ls_model = Lasso(lambda_val)
    # call to your CV function to compute mse for each fold
    ls_mse_vals = my_cross_val(rr_model, 'mse', X, y)
    # print mse from CV
    print("Lasso Regression MSE by Fold", ls_mse_vals)

0.01
Ridge Regression MSE by Fold [0.4863, 0.6298, 0.515, 0.5169, 0.5021, 0.5222, 0.4946, 0.5448, 0.607, 0.5003, 0.5319, 0.0462]
Lasso Regression MSE by Fold [0.5289, 0.5237, 0.5155, 0.5114, 0.5595, 0.5127, 0.5622, 0.5131, 0.4981, 0.5534, 0.5278, 0.0214]
0.1
Ridge Regression MSE by Fold [0.5344, 0.5563, 0.4942, 0.5501, 0.5003, 0.4496, 0.5704, 0.5832, 0.5745, 0.5196, 0.5333, 0.0402]
Lasso Regression MSE by Fold [0.5828, 0.4608, 0.5458, 0.5429, 0.5133, 0.5304, 0.5586, 0.5101, 0.5042, 0.5372, 0.5286, 0.0319]
1
Ridge Regression MSE by Fold [0.5743, 0.5746, 0.501, 0.5479, 0.5551, 0.5903, 0.5197, 0.5802, 0.5246, 0.4926, 0.546, 0.033]
Lasso Regression MSE by Fold [0.5618, 0.5457, 0.5489, 0.5198, 0.5987, 0.6027, 0.5311, 0.5233, 0.561, 0.4974, 0.549, 0.0319]
10
Ridge Regression MSE by Fold [0.6301, 0.5592, 0.6333, 0.4951, 0.592, 0.5956, 0.6191, 0.627, 0.5577, 0.6051, 0.5914, 0.0412]
Lasso Regression MSE by Fold [0.5773, 0.6258, 0.6101, 0.568, 0.5844, 0.5768, 0.5884, 0.5895, 0.5356, 0.6555, 0.59

In [8]:
rr_model_best = MyRidgeRegression(0.01)
# fit models using all training data
rr_model_best.fit(X_train, y_train)
# predict on test data
rr_model_best.predict(X_test)
# compute mse on test data
best_rr_mse_vals = my_cross_val(rr_model_best,'mse', X_test, y_test)
# print mse on test data
print("Ridge Regression MSE by fold, Best Lambda on Test Set", best_rr_mse_vals)

Ridge Regression MSE by fold, Best Lambda on Test Set [0.5135, 0.5808, 0.5518, 0.664, 0.5584, 0.542, 0.5663, 0.4881, 0.544, 0.5902, 0.5599, 0.0449]
