<br><font face="Times New Roman" size=5><div dir=ltr align=center>
<font color=blue size=8>
    Introduction to Machine Learning <br>
<font color=red size=5>
    Sharif University of Technology - Computer Engineering Department <br>
    Fall 2022<br> <br>
<font color=black size=6>
    Homework 2: Practical - Linear Regression
    </div>
<br><br>
<font size=4>
   **Name**: Seiede Solale Mohammadi <br>
   **Student ID**: 98106015<br> <br>

<font face="Times New Roman" size=4><div dir=ltr>
# Problem 1: Linear Regression Model (40 + 30 optional points)
According to <a href="https://github.com/asharifiz/Introduction_to_Machine_Learning/blob/main/Jupyter_Notebooks/Chapter_02_Classical_Models/Linear%20regression.ipynb"><font face="Roboto">Linear Regression Notebook</font></a>, train a linear regression model on an arbitrary dataset. Explain your chosen dataset and split your data into train and test sets, then predict values for the test set using your trained model. Try to find the best hyperparameters for your model. (Using Lasso Regression, Ridge Regression or Elastic Net and comparing them will have extra optional points)
<br> Explain each step of your workflow.

In [61]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [62]:
class Regression:
    def __init__(self, n_iter, alpha):
        self.n_iter = n_iter
        self.alpha = alpha
        self.theta = None
        self.bias = 0

    def update_params(self, theta_grad, b_grad, n):
        self.theta -= self.alpha * theta_grad
        self.bias -= self.alpha * b_grad

    def grads(self, error, X):
        raise NotImplemented

    def fit(self, X, y):
        self.theta = np.zeros((X.shape[1],))
        n = X.shape[0]

        for it in range(self.n_iter):
            error = self.predict(X).flatten() - y
            theta_grad, b_grad = self.grads(error, X)
            self.update_params(theta_grad, b_grad, n)

    def predict(self, x):
        return np.dot(x, self.theta) + self.bias


In [63]:
class LinearRegression(Regression):
    def __init__(self, n_iter, alpha):
        super().__init__(n_iter, alpha)

    def grads(self, error, X):
        theta_grad = np.dot(X.T, error) / X.shape[0]
        b_grad = np.sum(error) / X.shape[0]
        return theta_grad, b_grad

In [64]:
class RidgeRegression(Regression):
    def __init__(self, n_iter, alpha, l2_p):
        super().__init__(n_iter, alpha)
        self.l2_p = l2_p

    def grads(self, error, X):
        theta_grad = 2 * np.dot(X.T, error) / X.shape[0] + 2 * self.l2_p * self.theta
        b_grad = 2 * np.sum(error) / X.shape[0] + 2 * self.l2_p * self.bias
        return theta_grad, b_grad

    def predict(self, x):
        return np.dot(x, self.theta) + self.bias

In [65]:
class LassoRegression(Regression):
    def __init__(self, n_iter, alpha, l1_p):
        super().__init__(n_iter, alpha)
        self.l1_p = l1_p

    def grads(self, error, X):
        theta_grad = np.dot(X.T, error) / X.shape[0] + self.l1_p * np.sign(self.theta)
        b_grad = np.sum(error) / X.shape[0] + self.l1_p * np.sign(self.bias)
        return theta_grad, b_grad

In [66]:
class ElasticNetRegression(Regression):
    def __init__(self, n_iter, alpha, l1_p, ratio):
        super().__init__(n_iter, alpha)
        self.l1_p = l1_p
        self.ratio = ratio

    def grads(self, error, X):
        theta_grad = np.dot(X.T, error) / X.shape[0] + self.ratio * self.l1_p * np.sign(self.theta) + (
                1 - self.ratio) * self.l1_p * self.theta
        b_grad = np.sum(error) / X.shape[0] + self.ratio * self.l1_p * np.sign(self.bias) + (
                1 - self.ratio) * self.l1_p * self.bias
        return theta_grad, b_grad

In [67]:
def rmse(y, y_p):
    return np.sqrt(np.sum((y_p - y) ** 2) / y.shape[0])

In [68]:
df = pd.read_csv('data/weatherHistory.csv')
df.head()
X_d = df[['Apparent Temperature (C)', 'Humidity', 'Wind Speed (km/h)',
          'Wind Bearing (degrees)', 'Visibility (km)', 'Pressure (millibars)']]
y_d = df['Temperature (C)']
X_d.head()

Unnamed: 0,Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Pressure (millibars)
0,7.388889,0.89,14.1197,251.0,15.8263,1015.13
1,7.227778,0.86,14.2646,259.0,15.8263,1015.63
2,9.377778,0.89,3.9284,204.0,14.9569,1015.94
3,5.944444,0.83,14.1036,269.0,15.8263,1016.41
4,6.977778,0.83,11.0446,259.0,15.8263,1016.51


In [69]:
min_params = X_d.min()
max_params = X_d.max()
X_d = (X_d - min_params) / (max_params - min_params)
X_d.head()

Unnamed: 0,Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Pressure (millibars)
0,0.523486,0.89,0.22113,0.699164,0.983,0.970135
1,0.521084,0.86,0.223399,0.721448,0.983,0.970613
2,0.553144,0.89,0.061523,0.568245,0.929,0.970909
3,0.501947,0.83,0.220877,0.749304,0.983,0.971358
4,0.517356,0.83,0.17297,0.721448,0.983,0.971454


In [70]:
x_train, x_val, y_train, y_val = train_test_split(X_d, y_d, test_size=0.20, random_state=42)
x_train.shape

(77162, 6)

In [71]:
for n_it in [500, 1000, 3000, 5000]:
    for alpha in [0.5, 0.1, 0.01, 0.001]:
        model = LinearRegression(n_iter=n_it, alpha=alpha)
        model.fit(x_train, y_train)
        y_pred = model.predict(x_val)
        if y_pred is None:
            res = 'diverge'
        else:
            res = rmse(y=y_val, y_p=y_pred)
        print(f'alpha: {alpha}, iter: {n_it} -> rmse : {res}')

alpha: 0.5, iter: 500 -> rmse : 1.3082625369709613
alpha: 0.1, iter: 500 -> rmse : 3.116959894897317
alpha: 0.01, iter: 500 -> rmse : 7.545722762130821
alpha: 0.001, iter: 500 -> rmse : 9.388790380639366
alpha: 0.5, iter: 1000 -> rmse : 1.0083553614519478
alpha: 0.1, iter: 1000 -> rmse : 2.1138470180796314
alpha: 0.01, iter: 1000 -> rmse : 6.329975350131406
alpha: 0.001, iter: 1000 -> rmse : 8.959770814228527
alpha: 0.5, iter: 3000 -> rmse : 0.948222890972822
alpha: 0.1, iter: 3000 -> rmse : 1.2024423850227657
alpha: 0.01, iter: 3000 -> rmse : 4.0680485154806885
alpha: 0.001, iter: 3000 -> rmse : 8.19008197699283
alpha: 0.5, iter: 5000 -> rmse : 0.9482218377735062
alpha: 0.1, iter: 5000 -> rmse : 1.0085240530031783
alpha: 0.01, iter: 5000 -> rmse : 3.1184695482297715
alpha: 0.001, iter: 5000 -> rmse : 7.546130046068973


In [72]:
for n_it in [500, 1000, 3000]:
    for alpha in [0.1, 0.01]:
        for l2_param in [0.01, 0.05, 0.1]:
            model = RidgeRegression(n_iter=n_it, alpha=alpha, l2_p=l2_param)
            model.fit(x_train, y_train)
            y_pred = model.predict(x_val)
            if y_pred is None:
                res = 'diverge'
            else:
                res = rmse(y=y_val, y_p=y_pred)
            print(f'alpha: {alpha}, iter: {n_it} l2_param: {l2_param} -> rmse : {res}')

alpha: 0.1, iter: 500 l2_param: 0.01 -> rmse : 3.1711874526250856
alpha: 0.1, iter: 500 l2_param: 0.05 -> rmse : 5.588078534398759
alpha: 0.1, iter: 500 l2_param: 0.1 -> rmse : 6.767254635044586
alpha: 0.01, iter: 500 l2_param: 0.01 -> rmse : 6.447138963175827
alpha: 0.01, iter: 500 l2_param: 0.05 -> rmse : 6.866433799020907
alpha: 0.01, iter: 500 l2_param: 0.1 -> rmse : 7.287946916426394
alpha: 0.1, iter: 1000 l2_param: 0.01 -> rmse : 2.965715448742652
alpha: 0.1, iter: 1000 l2_param: 0.05 -> rmse : 5.586712220232421
alpha: 0.1, iter: 1000 l2_param: 0.1 -> rmse : 6.767250169919528
alpha: 0.01, iter: 1000 l2_param: 0.01 -> rmse : 5.165450650578604
alpha: 0.01, iter: 1000 l2_param: 0.05 -> rmse : 6.090718986969882
alpha: 0.01, iter: 1000 l2_param: 0.1 -> rmse : 6.886489617097173
alpha: 0.1, iter: 3000 l2_param: 0.01 -> rmse : 2.9308093400509043
alpha: 0.1, iter: 3000 l2_param: 0.05 -> rmse : 5.58670961326033
alpha: 0.1, iter: 3000 l2_param: 0.1 -> rmse : 6.767250169871234
alpha: 0.01, i

In [73]:
for n_it in [500, 1000, 3000]:
    for alpha in [0.1, 0.01]:
        for l1_param in [0.5, 0.1, 0.1]:
            model = LassoRegression(n_iter=n_it, alpha=alpha, l1_p=l1_param)
            model.fit(x_train, y_train)
            y_pred = model.predict(x_val)
            if y_pred is None:
                res = 'diverge'
            else:
                res = rmse(y=y_val, y_p=y_pred)
            print(f'alpha: {alpha}, iter: {n_it} l1_param: {l1_param} -> rmse : {res}')

alpha: 0.1, iter: 500 l1_param: 0.5 -> rmse : 4.782573027954316
alpha: 0.1, iter: 500 l1_param: 0.1 -> rmse : 3.284123967456391
alpha: 0.1, iter: 500 l1_param: 0.1 -> rmse : 3.284123967456391
alpha: 0.01, iter: 500 l1_param: 0.5 -> rmse : 7.991148244246787
alpha: 0.01, iter: 500 l1_param: 0.1 -> rmse : 7.6259473361780135
alpha: 0.01, iter: 500 l1_param: 0.1 -> rmse : 7.6259473361780135
alpha: 0.1, iter: 1000 l1_param: 0.5 -> rmse : 4.170518596495713
alpha: 0.1, iter: 1000 l1_param: 0.1 -> rmse : 2.457037606268117
alpha: 0.1, iter: 1000 l1_param: 0.1 -> rmse : 2.457037606268117
alpha: 0.01, iter: 1000 l1_param: 0.5 -> rmse : 7.1832422373715215
alpha: 0.01, iter: 1000 l1_param: 0.1 -> rmse : 6.480903522529443
alpha: 0.01, iter: 1000 l1_param: 0.1 -> rmse : 6.480903522529443
alpha: 0.1, iter: 3000 l1_param: 0.5 -> rmse : 4.110984522337069
alpha: 0.1, iter: 3000 l1_param: 0.1 -> rmse : 1.7842052483007065
alpha: 0.1, iter: 3000 l1_param: 0.1 -> rmse : 1.7842052483007065
alpha: 0.01, iter: 3

In [74]:
for n_it in [500, 1000, 3000]:
    for alpha in [0.1, 0.01]:
        for l1_param in [0.5, 0.1, 0.05]:
            for ratio in [0.2, 0.5, 0.8]:
                model = ElasticNetRegression(n_iter=n_it, alpha=alpha, l1_p=l1_param, ratio=ratio)
                model.fit(x_train, y_train)
                y_pred = model.predict(x_val)
                if y_pred is None:
                    res = 'diverge'
                else:
                    res = rmse(y=y_val, y_p=y_pred)
                print(f'alpha: {alpha}, iter: {n_it} l1_param: {l1_param} ration: {ratio} -> rmse : {res}')

alpha: 0.1, iter: 500 l1_param: 0.5 ration: 0.2 -> rmse : 8.595933527748969
alpha: 0.1, iter: 500 l1_param: 0.5 ration: 0.5 -> rmse : 8.283877296536176
alpha: 0.1, iter: 500 l1_param: 0.5 ration: 0.8 -> rmse : 7.454757149803114
alpha: 0.1, iter: 500 l1_param: 0.1 ration: 0.2 -> rmse : 6.4415713423213
alpha: 0.1, iter: 500 l1_param: 0.1 ration: 0.5 -> rmse : 5.728526587789148
alpha: 0.1, iter: 500 l1_param: 0.1 ration: 0.8 -> rmse : 4.5089523391588315
alpha: 0.1, iter: 500 l1_param: 0.05 ration: 0.2 -> rmse : 5.298438199543474
alpha: 0.1, iter: 500 l1_param: 0.05 ration: 0.5 -> rmse : 4.665991231038023
alpha: 0.1, iter: 500 l1_param: 0.05 ration: 0.8 -> rmse : 3.8319493394067456
alpha: 0.01, iter: 500 l1_param: 0.5 ration: 0.2 -> rmse : 8.656846307293126
alpha: 0.01, iter: 500 l1_param: 0.5 ration: 0.5 -> rmse : 8.469098783828226
alpha: 0.01, iter: 500 l1_param: 0.5 ration: 0.8 -> rmse : 8.21613769160368
alpha: 0.01, iter: 500 l1_param: 0.1 ration: 0.2 -> rmse : 7.857094582232565
alpha: