<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#2019-04-26_week09_fundamental-learning_whatis-pytorchnn-tute" data-toc-modified-id="2019-04-26_week09_fundamental-learning_whatis-pytorchnn-tute-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>2019-04-26_week09_fundamental-learning_whatis-pytorchnn-tute</a></span><ul class="toc-item"><li><ul class="toc-item"><li><span><a href="#Imports" data-toc-modified-id="Imports-1.0.1"><span class="toc-item-num">1.0.1&nbsp;&nbsp;</span>Imports</a></span></li><li><span><a href="#FashionMNIST-data-import-and-check" data-toc-modified-id="FashionMNIST-data-import-and-check-1.0.2"><span class="toc-item-num">1.0.2&nbsp;&nbsp;</span>FashionMNIST data import and check</a></span></li><li><span><a href="#load-up-MNIST-data" data-toc-modified-id="load-up-MNIST-data-1.0.3"><span class="toc-item-num">1.0.3&nbsp;&nbsp;</span>load up MNIST data</a></span><ul class="toc-item"><li><span><a href="#Aside-on-using-lists-of-indices-to-slice-np-arrays" data-toc-modified-id="Aside-on-using-lists-of-indices-to-slice-np-arrays-1.0.3.1"><span class="toc-item-num">1.0.3.1&nbsp;&nbsp;</span>Aside on using lists of indices to slice np arrays</a></span></li></ul></li></ul></li><li><span><a href="#Neural-netw-from-scratch-(no-torch.nn)" data-toc-modified-id="Neural-netw-from-scratch-(no-torch.nn)-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Neural netw from scratch (no torch.nn)</a></span><ul class="toc-item"><li><span><a href="#run-through-a-training-loop" data-toc-modified-id="run-through-a-training-loop-1.1.1"><span class="toc-item-num">1.1.1&nbsp;&nbsp;</span>run through a training loop</a></span></li></ul></li><li><span><a href="#refactor-to-use-nn-specific-code" data-toc-modified-id="refactor-to-use-nn-specific-code-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>refactor to use nn specific code</a></span><ul class="toc-item"><li><span><a href="#using-nn.sequential-to-replace-a-custom-model-class" data-toc-modified-id="using-nn.sequential-to-replace-a-custom-model-class-1.2.1"><span class="toc-item-num">1.2.1&nbsp;&nbsp;</span>using nn.sequential to replace a custom model class</a></span></li></ul></li></ul></li></ul></div>

# 2019-04-26_week09_fundamental-learning_whatis-pytorchnn-tute

### Imports

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn 
from torch import optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np

### FashionMNIST data import and check

In [None]:
transform = transforms.Compose(
    [
        transforms.ToTensor(),
    ]
)
fmnist_train = torchvision.datasets.FashionMNIST("data/Fashion-MNIST/", 
                                                download = True,
                                                train=True,
                                                transform =transform)

train_loader = torch.utils.data.DataLoader(fmnist_train,
                                          batch_size=64,
                                          shuffle=True,
                                          num_workers=4)

fmnist_test = torchvision.datasets.FashionMNIST("data/Fashion-MNIST/", 
                                                download = True,
                                                train=False,
                                                transform=transform)

test_loader = torch.utils.data.DataLoader(fmnist_test,
                                          batch_size=64,
                                          shuffle=True,
                                          num_workers=4)

category_labels = ["T-shirt/top", "Trouser", "Pullover", "Dress", 
          "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

In [None]:
dataiter = iter(train_loader)
images, labels = dataiter.next()

In [None]:
images.shape

In [None]:
print(category_labels[labels[0].item()])
plt.imshow(images[0,0,:,:].numpy(),cmap='gray')
plt.show()

### load up MNIST data

This is important as some of the initial matrix math seems only to work with the vectors used, and I dont want to overcomplicate things inititally

In [None]:
import math

In [None]:
from pathlib import Path
import requests

DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"

PATH.mkdir(parents=True, exist_ok=True)

URL = "http://deeplearning.net/data/mnist/"
FILENAME = "mnist.pkl.gz"

if not (PATH / FILENAME).exists():
        content = requests.get(URL + FILENAME).content
        (PATH / FILENAME).open("wb").write(content)

In [None]:
import pickle
import gzip

with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")

In [None]:
plt.imshow(x_train[0].reshape((28, 28)), cmap="gray")
print(x_train.shape)

In [None]:
x_train, y_train, x_valid, y_valid = map(
    torch.tensor, (x_train, y_train, x_valid, y_valid)
)
#was n and c in the example code
train_size, image_size = x_train.shape
x_train, x_train.shape, y_train.min(), y_train.max()
print(x_train, y_train)
print(x_train.shape)
print(y_train.min(), y_train.max())

#### Aside on using lists of indices to slice np arrays

passing a list of indices to a numpy array returns those indeces. eg

`x[[0,1,2],[4,3,1]]`
returns`[x[0,4], x[1,3], x[2,1]]`


and is equivelent to `x[range(3),[4,3,1]]`


## Neural netw from scratch (no torch.nn)

In [None]:
weights = torch.randn(784, 10) / math.sqrt(784)
weights.requires_grad_()
bias = torch.zeros(10, requires_grad=True)

In [None]:
def log_softmax(x):
    return x - x.exp().sum(-1).log().unsqueeze(-1)

def model(xb):
    return log_softmax(xb @ weights + bias)

In [None]:
batch_size = 64

x_batch = x_train[:batch_size]
predictions = model(x_batch)
print(predictions[0], predictions.shape)

In [None]:
# note can use no log, as the softmax layer has the log in it.
# this function is actually just the 
def negative_log_likelihood(input, target):
    return -input[range(target.shape[0]), target].mean()
loss_func = negative_log_likelihood

def accuracy(out,yb):
    preds = torch.argmax(out,dim=1)
    return (preds == yb).float().mean()

In [None]:
y_batch = y_train[:batch_size]
print(f"baseline loss : {loss_func(predictions,y_batch).item():0.3f}") 
print(f"baseline accuracy : {accuracy(predictions,y_batch).item()*100:0.1f}%")

### run through a training loop

In [None]:


batch_size = 64
learning_rate = 0.5
epochs =2

for epoch in range(epochs):
    for i in range((train_size-1)//batch_size+1):
        start_i = i*batch_size
        end_i = start_i + batch_size
        xb = x_train[start_i:end_i]
        yb = y_train[start_i:end_i]
        pred = model(xb)
        loss = loss_func(pred,yb)
        if i%100==0:
            print(f"{epoch:03d}, {i:05d}  loss : {loss:0.3f}")
        loss.backward()
        # since we are doing things manually
        # we dont want these operations added to the calculations
        # for the next calculation of the gradient
        with torch.no_grad():
            weights -= weights.grad * learning_rate
            bias -= bias.grad * learning_rate
            weights.grad.zero_()
            bias.grad.zero_()
            

In [None]:
print(f"loss : {loss_func(model(x_batch),y_batch).item():0.3f}") 
print(f"accuracy : {accuracy(model(x_batch),y_batch).item()*100:0.1f}%")

## refactor to use nn specific code

In [None]:
loss_func = F.cross_entropy

class Mnist_Logistic(nn.Module):
    def __init__(self):
        super().__init__()
        self.lin = nn.Linear(784,10)
        
    def forward(self, xb):
        return self.lin(xb)

class Mnist_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size = 3, stride = 2, padding =1)
        self.conv2 = nn.Conv2d(16, 16, kernel_size = 3, stride = 2, padding =1)
        self.conv3 = nn.Conv2d(16, 10, kernel_size = 3, stride = 2, padding =1)
        
    def forward(self, xb):
        xb = xb.view(-1,1,28,28)
        xb = F.relu(self.conv1(xb))
        xb = F.relu(self.conv2(xb))
        xb = F.relu(self.conv3(xb))
        xb = F.avg_pool2d(xb, 4)
        return xb.view(-1, xb.size(1))
        


def fit(model, optimizer, train_dl, valid_dl, loss_func,
        epochs=2, v=True):
    
    model.train() #used  by dropout and batchnorm2d for different behaviour
    for epoch in range(epochs):
        for i, data in enumerate(train_dl):
            xb, yb = data
            pred = model(xb)
            loss = loss_func(pred,yb)
            if i%100==0 and v:
                print(f"{epoch:03d}, {i:05d}  loss : {loss:0.3f}")
            loss.backward()


            optimizer.step()
            optimizer.zero_grad()
            
        model.eval() #used  by dropout and batchnorm2d for different behaviour
        with torch.no_grad():
            valid_loss = sum(loss_func(model(xb),yb) for xb, yb in valid_dl)
        print(f"epoch {epoch:03d}, validation loss : {valid_loss / len(valid_dl):0.3f}")
                

In [None]:
mnist_valid_ds = TensorDataset(x_valid, y_valid)
mnist_train_ds = TensorDataset(x_train,y_train)

def get_data(train_ds, valid_ds,batch_size):
    train_dl = DataLoader(train_ds, batch_size = batch_size, shuffle = True)
    # validation takes less memory as no backprop is used, 
    # hence a bigger batch size can be used
    valid_dl = DataLoader(valid_ds, batch_size*2) 
    
    return train_dl, valid_dl

In [None]:
train_dl, valid_dl = get_data(mnist_train_ds, mnist_valid_ds, batch_size=64)
linear_model = Mnist_Logistic()
optimizer = optim.SGD(linear_model.parameters(), lr=0.5)
fit(linear_model, optimizer, train_dl, valid_dl, loss_func,
    epochs=3, v=False)

In [None]:

cnn_model = Mnist_CNN()
optimizer = optim.SGD(cnn_model.parameters(), lr=0.1, momentum = 0.9)
fit(cnn_model, optimizer, train_dl, valid_dl, loss_func,
    epochs=3, v=False)

### using nn.sequential to replace a custom model class

In [None]:
# nn.Sequential doesnt have a `view` layer, so define a custom layer:
class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func
        
    def forward(self, x):
        return self.func(x)
    
def preprocess(x):
    return x.view(-1, 1, 28, 28)


seq_cnn_model = nn.Sequential(
    Lambda(preprocess),
    nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.AvgPool2d(4), # adaptive defines the size of the output tensor so flexi
    Lambda(lambda x: x.view(x.size(0), -1)),
)

In [None]:
optimizer = optim.SGD(seq_cnn_model.parameters(), lr=0.1, momentum = 0.9)
fit(seq_cnn_model, optimizer, train_dl, valid_dl, loss_func,
    epochs=3, v=False)