# Linear regression with gradient descent

In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import torch
import os

  from .autonotebook import tqdm as notebook_tqdm


### Load data

In [2]:
data = pd.read_csv('/nfs/team292/kt22/misc/nn_course/data/linreg-data.csv',
                   index_col = 0)
data.head(3)

Unnamed: 0,x1,x2,y
0,-2.201441,-0.468864,-30.355617
1,-0.291041,0.777277,25.560334
2,-0.327755,0.040071,32.797526


### Split data

In [3]:
X = torch.tensor(data[['x1', 'x2']].values, dtype = torch.float)
y = torch.tensor(data['y'].values, dtype = torch.float)

# -- Shuffle observations
idx = torch.randperm(y.size(0), dtype = torch.long)
X, y = X[idx], y[idx]

# -- Split train/test
cutoff = int(idx.size(0) * 0.7)

X_train, X_test = X[idx[:cutoff]], X[idx[cutoff:]]
y_train, y_test = y[idx[:cutoff]], y[idx[cutoff:]]

### Normalize data

In [4]:
mu, sigma = X_train.mean(axis = 0), X_train.std(axis = 0)
X_train = (X_train - mu) / sigma
X_test = (X_test - mu) / sigma

### Linear regression model

In [5]:
class LinearRegression():

    def __init__(self, num_features):
        
        self.num_features = num_features
        self.weights = torch.zeros(num_features, 1,
                                   dtype = torch.float)
        self.bias = torch.zeros(1, dtype = torch.float)
        
    def forward(self, x):
        
        net_input = torch.add(torch.mm(x, self.weights), self.bias) # equi: x @ self.weights + self.bias
        activations = net_input # activation and input the same because this is a linear function
        return activations.view(-1)
    
    def backward(self, x, yhat, y):
        
        # loss function is (yhat - y)ˆ2
        # the derivatiuve is 2 * (y - yhat)
        grad_loss_yhat = 2 * (y - yhat)
        
        grad_yhat_weights = -x
        grad_yhat_bias = -1.
        
        # Chain rule
        grad_loss_weights = torch.mm(grad_yhat_weights.t(),
                                     grad_loss_yhat.view(-1 ,1)) / y.size(0)
        
        grad_loss_bias = torch.sum(grad_yhat_bias * grad_loss_yhat) / y.size(0)
        
        # return negative gradient
        return (-1) * grad_loss_weights, (-1) * grad_loss_bias


#### Training and evaluation function

In [6]:
def loss(yhat, y):
    return torch.mean((yhat - y)**2)


def train(model, x, y, num_epochs, learning_rate=0.01):
    cost = []
    for e in range(num_epochs):

        #### Compute outputs ####
        yhat = model.forward(x)

        #### Compute gradients ####
        negative_grad_w, negative_grad_b = model.backward(x, yhat, y)

        #### Update weights ####
        model.weights += learning_rate * negative_grad_w
        model.bias += learning_rate * negative_grad_b

        #### Logging ####
        # yhat = model.forward(x) # note that this is a bit wasteful here
        curr_loss = loss(yhat, y)
        print('Epoch: %03d' % (e+1), end="")
        print(' | MSE: %.5f' % curr_loss)
        cost.append(curr_loss)

    return cost

### Train linear model

In [7]:
model = LinearRegression(num_features = X_train.size(1))

cost = train(model, 
             X_train, 
             y_train, 
             num_epochs = 100, 
             learning_rate = 0.05)

Epoch: 001 | MSE: 1817.39343
Epoch: 002 | MSE: 1548.56519
Epoch: 003 | MSE: 1330.74719
Epoch: 004 | MSE: 1154.25964
Epoch: 005 | MSE: 1011.26062
Epoch: 006 | MSE: 895.39545
Epoch: 007 | MSE: 801.51544
Epoch: 008 | MSE: 725.44873
Epoch: 009 | MSE: 663.81531
Epoch: 010 | MSE: 613.87659
Epoch: 011 | MSE: 573.41339
Epoch: 012 | MSE: 540.62799
Epoch: 013 | MSE: 514.06342
Epoch: 014 | MSE: 492.53915
Epoch: 015 | MSE: 475.09903
Epoch: 016 | MSE: 460.96799
Epoch: 017 | MSE: 449.51816
Epoch: 018 | MSE: 440.24084
Epoch: 019 | MSE: 432.72385
Epoch: 020 | MSE: 426.63303
Epoch: 021 | MSE: 421.69794
Epoch: 022 | MSE: 417.69925
Epoch: 023 | MSE: 414.45920
Epoch: 024 | MSE: 411.83392
Epoch: 025 | MSE: 409.70679
Epoch: 026 | MSE: 407.98318
Epoch: 027 | MSE: 406.58661
Epoch: 028 | MSE: 405.45505
Epoch: 029 | MSE: 404.53818
Epoch: 030 | MSE: 403.79526
Epoch: 031 | MSE: 403.19330
Epoch: 032 | MSE: 402.70554
Epoch: 033 | MSE: 402.31027
Epoch: 034 | MSE: 401.99008
Epoch: 035 | MSE: 401.73062
Epoch: 036 | MS