<a href="https://colab.research.google.com/github/ajfisch/deeplearning_bootcamp_2020/blob/master/advanced_vision_tutorial_2020.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PyTorch Basics 

In this tutorial, we'll develop our dataset and model with PyTorch

Let's get started!

In [None]:
# http://pytorch.org/
from os import path
from matplotlib import pyplot as plt

# Gives a progress bar
from tqdm import tqdm

import torch
from torch.utils import data
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
print(torch.__version__)
print(torch.cuda.is_available())

In [None]:
# CONSTANTS
RADII = [2,3]
NUM_SAMPLES = 10000

## Step 1: Building the dataset

Datasets are abstractions that hold data for you. As long as you define a `__len__` and `__getitem__`, they can be used to pipe data into your training routine.

In [None]:
class ConcentricCircles(data.Dataset):
    def __init__(
        self, 
        split,
        radii = RADII, 
        num_samples = NUM_SAMPLES, 
        partition_ratio = [0.7, 0.15, 0.15],
        seed = 0) -> None:
        super(ConcentricCircles, self).__init__()

        torch.random.manual_seed(seed)
        X = 2 * (radii[-1] + 1) * torch.rand(num_samples,2) - (radii[-1] + 1)
        Y = torch.zeros(num_samples)

        if len(radii) == 1:
            Y[ torch.where( (X**2).sum(-1)  > radii[0]**2 ) ] = 1
        else:
            for i, r in enumerate(radii):
                Y[ torch.where( (X**2).sum(-1)  > r**2) ] = i+1


        Y = Y.long()

        shuffled_indices = torch.randperm(num_samples)
        train_indx = shuffled_indices[:int(partition_ratio[0]*num_samples)]
        val_indx = shuffled_indices[int(partition_ratio[0]*num_samples):  int(num_samples * sum(partition_ratio[:2]) ) ]
        test_indx = shuffled_indices[int(num_samples * sum(partition_ratio[:2]) ): ]
        if split == "train":
            self.dataset = list(zip(X[train_indx], Y[train_indx]))
        elif split == "dev": 
            self.dataset = list(zip(X[val_indx], Y[val_indx]))
        elif split == "test":
            self.dataset = list(zip(X[test_indx], Y[test_indx]))
        
        self.X, self.Y = X,Y
        self.num_classes = 2 if len(radii) == 1 else len(radii) + 1

    def __getitem__(self, index):
        return self.dataset[index] 

    def __len__(self):
        return len(self.dataset)

In [None]:
def visualize_circles(dataset):
    x = torch.stack([x[0] for x in dataset])
    y = [x[1] for x in dataset]
    plt.scatter(x[:,0], x[:,1], c = y)
    plt.show()

In [None]:
train_dataset = ConcentricCircles(split="train")
dev_dataset = ConcentricCircles(split="dev")
test_dataset = ConcentricCircles(split="test")

In [None]:
# visualize data
visualize_circles(train_dataset)

## Step 2: Building a model

All pytorch models should be implemented as instances of `nn.Module`. 

To build a model you need to:

1. define what parameters it'll need in it's `__init__` function

2. define the model's computation, using those parameters, in the `forward` function.


To keep things simple, lets define a simple linear classifer, like logistic regression. We'll experiment with more complex models soon.

In [None]:
class Model(nn.Module):
   
    def __init__(self, num_classes):
        super(Model, self).__init__()
        self.fully_connected = nn.Linear(2, num_classes)

    def forward(self, x):
        return self.fully_connected(x)


## Step 3. Defining our training procedure

To train our model, let's introduce a couple new PyTorch ideas.

A [DataLoader](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader) is an iterator that goes over our entire dataset and selects batches. 
We'll be using this to iterate through our train/dev/test sets.

Let's intialize these now. 

An [Optimizer](https://pytorch.org/docs/stable/optim.html) defines an update rule. In class, we've discussed vanilla SGD, which is one method to compute the next weight, given the current weight and gradient. There are plently of other optimizers you can try from the pytorch library. 


In [None]:
# Training settings
epochs = 10
lr = .01
momentum = 0.5
batch_size = 32
num_classes = train_dataset.num_classes


train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = torch.utils.data.DataLoader(dev_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)


model = Model(num_classes)
optimizer = optim.Adam(model.parameters(), lr=lr)


To train our model:

1) we'll randomly sample batches from our train loader

2) compute our loss (using standard `cross_entropy`)

3) compute our gradients (by calling `backward()` on our loss)

4) update our neural network with an `optimizer.step()`, and go back to 1)

I've added some extra stuff here to log our accuracy and average loss for the epoch.


In [None]:
def train_epoch( model, train_loader, optimizer, epoch):
    model.train() # Set the nn.Module to train mode. 
    model = model.to('cuda')
    total_loss = 0
    correct = 0
    num_samples = len(train_loader.dataset)
    for batch_idx, (x, target) in tqdm(enumerate(train_loader), total=len(train_loader)): #1) get batch
        x, target = x.to('cuda'), target.to('cuda')
        # Reset gradient data to 0
        optimizer.zero_grad()
        # Get prediction for batch
        output = model(x)
        # 2) Compute loss
        loss = F.cross_entropy(output, target)
        #3) Do backprop
        loss.backward()
        #4) Update model
        optimizer.step()
        
        ## Do book-keeping to track accuracy and avg loss
        pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()
        total_loss += loss.detach() # Don't keep computation graph 

    print('Train Epoch: {} \tLoss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
            epoch, total_loss / num_samples, 
            correct, 
            num_samples,
            100. * correct / num_samples))


## Step 3.5 Define our evaluation loop
Similar to above, we'll also loop through our dev or test set, and compute our loss and accuracy. 
This lets us see how well our model is generalizing. 

In [None]:
def eval_epoch(model, test_loader, name):
    model = model.to('cuda')
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in tqdm(test_loader):
        data, target = data.to('cuda'), target.to('cuda')
        output = model(data)
        test_loss += F.cross_entropy(output, target).item() # sum up batch loss
        pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\n{} set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        name,
        test_loss, 
        correct, 
        len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


## Step 4: Training the model

In [None]:

for epoch in range(1, epochs + 1):
    train_epoch(model, train_loader, optimizer, epoch)
    eval_epoch(model,  dev_loader, "Dev")
    print("---")

# Step 5. Experiment with MLP
Let's try a more complex model.

In [None]:
class Model(nn.Module):
    def __init__(self, num_classes):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(2, 10)
        self.fc2 = nn.Linear(10,10)
        self.fc3 = nn.Linear(10, num_classes)
        

    def forward(self, x):
        hidden = F.relu(self.fc1(x))
        hidden = F.relu(self.fc2(hidden))
        logit = self.fc3(hidden)
        return logit

In [None]:
model = Model(num_classes)
optimizer = optim.Adam(model.parameters(), lr=lr)

In [None]:

for epoch in range(1, epochs + 1):
    train_epoch(model, train_loader, optimizer, epoch)
    eval_epoch(model,  dev_loader, "Dev")
    print("---")

# Step 6. Evaluate the best model

In [None]:
eval_epoch(model,  test_loader, "Test")

#### Saving and Loading

In [None]:
torch.save(model,'path.pt')     # to save
model = torch.load('path.pt')   # to load a saved model