In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import os, sys, glob
import torch as T
import torch.optim as optim


import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR

# Pytorch for Neural Netowrks
Pytorch is one of the most popular libraries for deep learning and neural network research. We will implement two different approaches in Pytorch to explain how it works and can be used. 

# Linear regression as a hello world to PyTorch
First to get familiar with the PyTorch workflow we're just going to implement a simple linear regression. The goal is to train a neural network to recover the linear regression.    
Our equation will be of the form
$$ y = w_0*x_0 + w_1*x_1 + w_2*x_2 $$

So we will start by setting up our data. Then we'll build the simplest neural network model so represent this operation. Finally we will train it on our data to show it can recover the parameterization

In [None]:
w = [0.12345, 3.4567, 6.789] 

In [None]:
x = np.random.randn(300,3)
y =( w[0]* x[:,0] + w[1]* x[:,1] + w[2] *x[:,2]).reshape(-1, 1)
x_data = Variable(T.from_numpy(x.astype(np.float32)))
y_data = Variable(T.from_numpy(y.astype(np.float32)))


In [None]:
print(y.shape)

So we generate 300 points of data to train on, which is probably a fair amount of overkill. Now we can setup a simple linear model. We need to define the layers, and the forward step. 

In [None]:
class LinearRegressionNet(torch.nn.Module): 

    def __init__(self): 
        super(LinearRegressionNet, self).__init__() 
        self.linear = torch.nn.Linear(3, 1) 

    def forward(self, x): 
        y_pred = self.linear(x) 
        return y_pred 

model = LinearRegressionModel() 

Next we need some way of telling our model how good it is. For this we will use the mean squared error. We will also use stochastic gradient descent. 

In [None]:
lr = 0.001
criterion = torch.nn.MSELoss(size_average = False) 
optimizer = torch.optim.SGD(model.parameters(), lr = lr) 



Now we setup our training pass. For this we will present our data to the model. We then calculate a loss parameter. Next we zero out the gradient as PyTorch accumulates the gradient otherwise. Then we tell the loss to calculate the backward step, which corresponds to taking the gradient. Finally we ask our optimizer to update all of our weights and parameters. 

In [None]:
parameter_history = np.zeros((10,3))
for epoch in range(10): 

    # Forward pass: Compute predicted y by passing 
    # x to the model 
    parameter_history[epoch] = np.array(model.linear.weight.tolist())
    pred_y = model(x_data) 

    # Compute and print loss 
    loss = criterion(pred_y, y_data) 

    # Zero gradients, perform a backward pass, 
    # and update the weights. 
    optimizer.zero_grad() 
    loss.backward() 
    optimizer.step() 
    print('epoch {}, loss {}'.format(epoch, loss.data)) 




In [None]:
plt.figure(figsize=(5,5))
plt.plot(parameter_history[:,0])
plt.xlabel('Epoch')
plt.ylabel('Weight')

In [None]:
plt.figure(figsize=(5,5))
plt.plot(parameter_history)
plt.xlabel('Epoch')
plt.ylabel('Weight')

In [None]:
list(model.parameters())

In [None]:
model.linear.weight.grad

# MNIST model in PyTorch
MNIST is a fairly standard dataset and provides a nice small self contained example we can use to see how a CNN would be imnplemented in PyTorch. 

This example was adapted from one in the PyTorch Documentation, which is a wonderful resource you should check out. 

First we can start by defining our network. This will consist of two convolutional layers with max pooling, followed by 2 fully connected layers. We will use the RELU function as our activation function, and use two drop out nodes to help avoid overfitting. 

In [None]:

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

So first we start in the init by seeing up each of our individual layers. Then we overload the forward function to define the actual model and tie together the layers. 

In [None]:
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args['log_interval'] == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


def test(args, model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


These two helper functions will be responsible for our training, and testing of the model. 

In [None]:
use_cuda = False
device = torch.device("cuda" if use_cuda else "cpu")
args = {
    'batch_size': 64,
    'test_batch_size': 1000,
    'epochs': 14,
    'log_interval': 10,
    'lr': 1,
    'gamma': 0.7
}
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args['batch_size'], shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args['test_batch_size'], shuffle=True, **kwargs)

model = Net().to(device)
optimizer = optim.Adadelta(model.parameters(), lr=args['lr'])

scheduler = StepLR(optimizer, step_size=1, gamma=args['gamma'])
for epoch in range(1, args['epochs'] + 1):
    train(args, model, device, train_loader, optimizer, epoch)
    test(args, model, device, test_loader)
    scheduler.step()