We'll finish by an introduction to PyTorch.

## PyTorch and MNIST
Let us first build an introductory model for the MNIST dataset.  
More on MNIST can be found here: https://www.kaggle.com/hojjatk/mnist-dataset?msclkid=a59b1a61bffd11ec953ecf3f2a143919  
PyTorch Tutorials: https://pytorch.org/tutorials/  
Good video with intros: https://www.youtube.com/watch?v=OMDn66kM9Qc  

**Loading the MNIST dataset in PyTorch**

In [143]:
import torch
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as f
import torchvision.datasets as datasets

.ToTensor()

In [126]:
batch_size = 32
valid_size = 0.2


transform = transforms.ToTensor()

train_data = datasets.MNIST(root='data', train=True,
                                   download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False,
                                  download=True, transform=transform)

In [127]:
print(train_data)

Dataset MNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()


**DataLoaders in PyTorch**

DataLoader

In [128]:
num_workers = 0

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
    num_workers=num_workers)

test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size,
    num_workers=num_workers, shuffle=True)

**Building a network**

In [129]:
class model_mnist(nn.Module):
    def __init__(self):
        super(model_mnist, self).__init__()
        self.input_dense=nn.Linear(28*28,510)
        self.output_dense=nn.Linear(510,10)
        
    def forward(self,x):
        x=f.relu(self.input_dense(x)) 
        pred=self.output_dense(x)
        return pred

In [130]:
model=model_mnist()

In [131]:
print(model.parameters)

<bound method Module.parameters of model_mnist(
  (input_dense): Linear(in_features=784, out_features=510, bias=True)
  (output_dense): Linear(in_features=510, out_features=10, bias=True)
)>


In [132]:
learning_rate = 0.01

loss=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model.parameters(),lr=learning_rate)

In [144]:
num_epochs = 5
train_loss = list()

for epoch in range(num_epochs):
    total_train_loss = 0
    
    model.train()
    
    # training
    for i, (image, label) in enumerate(train_loader):

        image = image.reshape(-1, 28*28)
        # optimizing gradients to zero before training 
        
        optimizer.zero_grad()

        pred = model(image)

        losses = loss(pred, label)
        total_train_loss += losses.item()

        losses.backward()
        optimizer.step()

    total_train_loss = total_train_loss / (i + 1)
    train_loss.append(total_train_loss)

    print (f'Epochs [{epoch+1}/{num_epochs}], Step[{i+1}/{len(train_loader)}], Losses: {losses.item():.6f}')

Epochs [1/5], Step[1875/1875], Losses: 0.000000
Epochs [2/5], Step[1875/1875], Losses: 0.000006
Epochs [3/5], Step[1875/1875], Losses: 0.000010
Epochs [4/5], Step[1875/1875], Losses: 0.000028
Epochs [5/5], Step[1875/1875], Losses: 0.000000


In [145]:
model.eval()

results = list()
total = 0
for i, (image, label) in enumerate(test_loader):

    image = image.reshape(-1, 28*28)

    pred = model(image)
    pred = torch.nn.functional.softmax(pred, dim=1)

    for j, p in enumerate(pred):
        if label[j] == torch.max(p.data, 0)[1]:
            total = total + 1
            #print(j)
            results.append((image, torch.max(p.data, 0)[1]))

test_accuracy = total / (i*batch_size)
print('Test accuracy {:.4f}'.format(test_accuracy))

Test accuracy 0.9667
