# ADALINE automatically

In [29]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import torch
import os

from torch.autograd import grad
import torch.nn.functional as F

### Load data

In [30]:
data = pd.read_csv('/nfs/team292/kt22/misc/nn_course/data/linreg-data.csv',
                   index_col = 0)
data.head(3)

Unnamed: 0,x1,x2,y
0,-2.201441,-0.468864,-30.355617
1,-0.291041,0.777277,25.560334
2,-0.327755,0.040071,32.797526


### Split data

In [31]:
X = torch.tensor(data[['x1', 'x2']].values, dtype = torch.float)
y = torch.tensor(data['y'].values, dtype = torch.float)

# -- Shuffle observations
idx = torch.randperm(y.size(0), dtype = torch.long)
X, y = X[idx], y[idx]

# -- Split train/test
cutoff = int(idx.size(0) * 0.7)

X_train, X_test = X[idx[:cutoff]], X[idx[cutoff:]]
y_train, y_test = y[idx[:cutoff]], y[idx[cutoff:]]

### Normalize data

In [32]:
mu, sigma = X_train.mean(axis = 0), X_train.std(axis = 0)
X_train = (X_train - mu) / sigma
X_test = (X_test - mu) / sigma

### Class ADALINE

In [33]:
def loss_func(yhat, y):
    return torch.mean((yhat - y)**2)

In [34]:
class Adaline3(torch.nn.Module):
    
    def __init__(self, num_features):
        super(Adaline3, self).__init__()
        self.linear = torch.nn.Linear(in_features = num_features,
                                      out_features = 1)
        
        # change random weights to zero
        # (don't do this for multi-layer nets!)
        self.linear.weight.detach().zero_()
        self.linear.bias.detach().zero_()

    def forward(self, x):
        netinputs = self.linear(x)
        activations = netinputs
        return activations.view(-1)

    
####################################################
##### Training and evaluation wrappers
###################################################


def train(model, x, y, num_epochs, learning_rate = 0.01, seed = 123, minibatch_size = 10):
    cost = []
    
    torch.manual_seed(seed)
    
    optimizer = torch.optim.SGD(model.parameters(),
                                lr = learning_rate)
    
    for e in range(num_epochs):
        
        #### Shuffle epoch
        shuffle_idx = torch.randperm(y.size(0),
                                     dtype = torch.long)
        minibatches = torch.split(shuffle_idx, minibatch_size)
        
        for minibatch_idx in minibatches:

            #### Compute outputs ####
            yhat = model.forward(x[minibatch_idx])
            
            # you could also use our "manual" loss_func
            loss = F.mse_loss(yhat, y[minibatch_idx])
            
            #### Reset gradients from previous iteration ####
            optimizer.zero_grad()
            
            #### Compute gradients ####
            loss.backward()
            
            #### Update weights ####
            optimizer.step()

        #### Logging ####
        with torch.no_grad():
            # context manager to
            # avoid building graph during "inference"
            # to save memory
            yhat = model.forward(x)
            curr_loss = loss_func(yhat, y)
            print('Epoch: %03d' % (e+1), end="")
            print(' | MSE: %.5f' % curr_loss)
            cost.append(curr_loss)

    return cost, optimizer

In [35]:
model = Adaline3(num_features = X_train.size(1))

cost, optimizer = train(model,
                        X_train,
                        y_train.float(),
                        num_epochs=20,
                        learning_rate=0.01,
                        seed=123,
                        minibatch_size=10)

Epoch: 001 | MSE: 478.79483
Epoch: 002 | MSE: 389.34915
Epoch: 003 | MSE: 384.71274
Epoch: 004 | MSE: 384.27646
Epoch: 005 | MSE: 384.39951
Epoch: 006 | MSE: 384.31329
Epoch: 007 | MSE: 384.53223
Epoch: 008 | MSE: 384.33130
Epoch: 009 | MSE: 384.23593
Epoch: 010 | MSE: 384.29434
Epoch: 011 | MSE: 384.61581
Epoch: 012 | MSE: 384.42798
Epoch: 013 | MSE: 384.36292
Epoch: 014 | MSE: 384.35608
Epoch: 015 | MSE: 384.24762
Epoch: 016 | MSE: 384.44861
Epoch: 017 | MSE: 384.31253
Epoch: 018 | MSE: 384.29813
Epoch: 019 | MSE: 384.42749
Epoch: 020 | MSE: 384.29214


In [38]:
optimizer.step

<bound method SGD.step of SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    lr: 0.01
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)>