## Question 5

In [None]:
pip install -r requirements.txt

In [35]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

In [36]:
## Load and clean data
df_train = pd.read_csv("train.csv")
df_train = df_train.drop(columns=["zipcode"])

columns = df_train.drop(columns=["price"]).columns.tolist()
df_train[columns] = (df_train[columns] - df_train[columns].mean()) / df_train[columns].std()
df_train["price"] = df_train["price"] / 1000

df_test = pd.read_csv("test.csv")
df_test = df_test.drop(columns=["zipcode", "id", "date"])

columns = df_test.drop(columns=["price"]).columns.tolist()
df_test[columns] = (df_test[columns] - df_test[columns].mean()) / df_test[columns].std()
df_test["price"] = df_test["price"] / 1000

#print(df_test.describe().round(2))

In [37]:
## Set training and testing data

X_train = df_train.drop(columns=["price"])
y_train = df_train["price"]
X_test = df_test.drop(columns=["price"])
y_test = df_test["price"]

## Question 5.1

In [42]:
## define Lienar Regression with Gradient Descent

class LinearRegressionGD:
    
    def __init__(self, alpha=0.01, iterations=100):
        self.alpha = alpha
        self.iterations = iterations
        self.theta = None
        
    def fit(self, X, y):
        n = X.shape[0]
        
        ones = np.ones((n, 1))
        X_b = np.hstack((ones, X))
        self.theta = np.zeros(X_b.shape[1])
        
        for _ in range(self.iterations):
            predictions = X_b @ self.theta
            errors = predictions - y
            
            gradient = (2/n) * (errors.T @ X_b)
            self.theta -= self.alpha * gradient
            
    def predict(self, X):
        n = X.shape[0]
        ones = np.ones((n, 1))
        X_b = np.hstack((ones, X))
        return X_b @ self.theta

## Question 5.2

In [43]:
## Train model with different alphas and iterations

alphas = [0.01, 0.1, 0.5]
iterations = [10, 50, 100]
results = [('a', 'iterations', 'Train MSE', 'Train R^2', 'Test MSE', 'Test R^2')]

for a in alphas:
    for i in iterations:
        model = LinearRegressionGD(a, i)

        model.fit(X_train, y_train)

        y_train_pred = model.predict(X_train)
        y_test_pred = model.predict(X_test)

        train_mse = mean_squared_error(y_train, y_train_pred)
        train_r2 = r2_score(y_train, y_train_pred)
        test_mse = mean_squared_error(y_test, y_test_pred)
        test_r2 = r2_score(y_test, y_test_pred)

        results.append((a, i, train_mse, train_r2, test_mse, test_r2))

results_df = pd.DataFrame(results[1:], columns=results[0])

print(results_df.round(3).to_string())     

      a  iterations      Train MSE      Train R^2       Test MSE       Test R^2
0  0.01          10   2.357554e+05  -1.048000e+00   2.931676e+05  -7.580000e-01
1  0.01          50   6.970036e+04   3.950000e-01   1.079586e+05   3.520000e-01
2  0.01         100   3.676615e+04   6.810000e-01   6.896546e+04   5.860000e-01
3  0.10          10   3.504898e+04   6.960000e-01   6.661907e+04   6.000000e-01
4  0.10          50   3.142711e+04   7.270000e-01   6.000553e+04   6.400000e-01
5  0.10         100   3.141602e+04   7.270000e-01   5.990571e+04   6.410000e-01
6  0.50          10   1.428612e+17  -1.240791e+12   1.395471e+17  -8.369797e+11
7  0.50          50   1.143174e+67  -9.928799e+61   1.116654e+67  -6.697502e+61
8  0.50         100  2.735960e+129 -2.376262e+124  2.672491e+129 -1.602915e+124


## Question 5.3

Looking at alpha 0.01, we can see that the at 10 iterations the R^2 is terrible for both training and testing, showing that our model is doing a very bad job at predictions and that 10 iterations is not enough. At 100, the R^2 is 0.68 and 0.58 for training and test data respectively, meaning that through 100 iterations the model does improve a significant amount, but lots of iterations are needed for such a small alpha. With alpha 0.1, we can see that between 50 iterations and 100 iterations, the MSE and R^2 for both training and testing is about the same, meaning that the model has converged to an optimal solution. This also means that this solution is reached at at least 50 iterations, meaning that much less iterations are needed for this value of alpha. When observing alpha 0.5, we can see that the MSE and R^2 values reach astronomical values, showing that this step size is much too large and ends up diverging rather than converging. 