# **mnist_convnet**
- ELEC 576 HW 1
- Robert Heeter
- 4 October 2023

## **Structure**:
1) Set PyTorch metadata
    - Seed
    - TensorFlow output
    - Whether to transfer to gpu (cuda)
2) Import data
    - Download data
    - Create data loaders with batchsize, transforms, scaling
3) Define model architecture, loss, and optimizer
4) Define test and training loops
    - Train:
        - Get next batch
        - Forward pass through model-
        - Calculate loss
        - Backward pass from loss (calculates the gradient for each parameter)
        - Optimizer: performs weight updates
5) Perform training over multiple epochs
    - Each epoch:
        - Call train loop
        - Call test loop

## **Acknowledgements**:
- https://colab.research.google.com/drive/1i9KpbQyFU4zfq8zLLns8a2Kd8PRMGsaZ
- https://github.com/motokimura/pytorch_tensorboard/blob/master/main.py

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import numpy as np

from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
import os


In [None]:
# 1. Set PyTorch metadata

batch_size = 64
test_batch_size = 1000
epochs = 1
lr = 0.01
try_cuda = True
seed = 1000
logging_interval = 10 # how many batches to wait before logging
logging_dir = None

# setting up the logging
log_dir = os.path.join(os.getcwd(),'mnist_convnet_log', datetime.now().strftime('%b%d_%H-%M-%S'))
writer = SummaryWriter(log_dir=log_dir)

# deciding whether to send to the cpu or not if available
if torch.cuda.is_available() and try_cuda:
    cuda = True
    torch.cuda.mnaual_seed(seed)
else:
    cuda = False
    torch.manual_seed(seed)
    

In [None]:
# 2. Import data

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.01307,), (0.3081,))])

train_loader = torch.utils.data.DataLoader(datasets.MNIST('data', train=True, download=True, transform=transform),
                                           batch_size=batch_size,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(datasets.MNIST('data', train=False, download=True, transform=transform),
                                          batch_size=test_batch_size,
                                          shuffle=True)


In [None]:
# 3. Defining model architecture, loss, and optimizer

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        # x = F.relu(F.max_pool2d(self.conv1(x), 2))
        # x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        # x = x.view(-1, 320) # (batch_size, units)
        # x = F.relu(self.fc1(x))
        # x = F.dropout(x, training=self.training)
        # x = self.fc2(x)
        # x = F.softmax(x, dim=1)

        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 320)
        x = self.fc1(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5)
        x = self.fc2(x)
        x = F.softmax(x, dim=1)


        return x

model = Net()

optimizer = optim.Adam(model.parameters(), lr=lr)


In [None]:
# 4. Define test and training loops

eps=1e-13

def train(epoch):
    model.train()

    # criterion = nn.CrossEntropyLoss()
    criterion = nn.NLLLoss(size_average=False)

    for batch_idx, (data, target) in enumerate(train_loader):
        if cuda:
            data, target = data.cuda(), target.cuda()

        optimizer.zero_grad()
        output = model(data) # forward pass
        loss = criterion(torch.log(output+eps), target) # = sum_k(-t_k * log(y_k))
        loss.backward() # backward pass
        optimizer.step()

        if batch_idx % logging_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data)
            )

            # log train/loss to TensorBoard at every iteration
            n_iter = (epoch - 1) * len(train_loader) + batch_idx + 1
            writer.add_scalar('train/loss', loss.data, n_iter)

#     log model parameters to TensorBoard at every epoch
    for name, param in model.named_parameters():
        layer, attr = os.path.splitext(name)
        attr = attr[1:]
        writer.add_histogram('{}/{}'.format(layer, attr), param.clone().cpu().data.numpy(), n_iter)

def test(epoch):
    model.eval()
    test_loss = 0
    correct = 0
    
    # criterion = nn.CrossEntropyLoss()
    # criterion = nn.CrossEntropyLoss(size_average = False)
    criterion = nn.NLLLoss(size_average = False)

    for data, target in test_loader:
        if cuda:
            data, target = data.cuda(), target.cuda()

        output = model(data)

        test_loss += criterion(torch.log(output+eps), target,).item() # sum up batch loss (later, averaged over all test samples)
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset), test_accuracy)
    )

    # log test/loss and test/accuracy to TensorBoard at every epoch
    n_iter = epoch * len(train_loader)
    writer.add_scalar('test/loss', test_loss, n_iter)
    writer.add_scalar('test/accuracy', test_accuracy, n_iter)
    

In [None]:
# 5. Perform training over multiple epochs

# start training
for epoch in range(1, epochs + 1):
    train(epoch)
    test(epoch)

writer.close()

In [None]:
%reload_ext tensorboard
%tensorboard --logdir log_dir --port=8008