In [17]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter ######

**Model**

In [18]:
class CNN(nn.Module):
  def __init__(self, ip_channels=1, op_features=10):
    super().__init__()
    self.conv1 = nn.Conv2d(ip_channels, 8, 3, (1,1), (1,1) )  # ip_channnels, op_channels, kernel_size, stride, padding
    self.conv2 = nn.Conv2d(8, 16, 3, (1,1), (1,1))
    self.maxpool = nn.MaxPool2d((2,2), (2,2))   # kernel_size, stride
    self.fc1 = nn.Linear(16*7*7, op_features)
    
  def forward(self, x):
    x = self.conv1(x)
    x = F.relu(x)
    x = self.maxpool(x)
    x = self.conv2(x)
    x = F.relu(x)
    x = self.maxpool(x)
    x = x.view(x.shape[0], -1)
    x = self.fc1(x)
    return x



In [19]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

**Hyperparameters**

In [20]:
ip_channels = 1
op_features = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 5

**Loading Data**

In [21]:
train_dataset = datasets.MNIST(root='Data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='Data', train=False, transform=transforms.ToTensor(), download=True)


test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

In [22]:
for i in train_dataset:
  print(type(i))
  print(len(i))
  print(type(i[0]))
  print(i[0].shape)
  print(type(i[1]))
  print(i[1])
  break

<class 'tuple'>
2
<class 'torch.Tensor'>
torch.Size([1, 28, 28])
<class 'int'>
5


In [23]:
for i in train_loader:
  print(type(i))
  print(len(i))
  print(type(i[0]))
  print(i[0].shape)
  print(type(i[1]))
  print(i[1])
  break

<class 'list'>
2
<class 'torch.Tensor'>
torch.Size([64, 1, 28, 28])
<class 'torch.Tensor'>
tensor([8, 6, 9, 9, 3, 6, 0, 6, 6, 1, 4, 3, 1, 1, 3, 9, 4, 0, 8, 9, 3, 3, 7, 3,
        7, 1, 9, 1, 2, 2, 6, 0, 7, 8, 7, 9, 0, 6, 7, 3, 4, 3, 6, 9, 8, 7, 0, 7,
        2, 3, 8, 1, 2, 2, 6, 2, 7, 5, 1, 5, 2, 1, 4, 7])


In [32]:
batch_sizes = [1,64,128, 1024]
learning_rates = [0.1, 0.01, 0.001, 0.0001]

**Training**

In [31]:
for batch_size in batch_sizes:
    for learning_rate in learning_rates:
        step = 0
        model.train() ########################################
        model = CNN().to(device)
        
        loss_function = nn.CrossEntropyLoss()
        train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.0)
        writer = SummaryWriter(f'runs/MNIST/MiniBatchSize {batch_size} LR {learning_rate}')
        
        for epoch in range(num_epochs):
            losses = []
            accuracies = []
            for batch_index, data in enumerate(train_loader):
                X, y = data
                X = X.to(device)
                y = y.to(device)
                y_ = model(X)
                loss = loss_function(y_, y)
                losses.append(loss)
                
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                _, predictions = scores.max(dim=1)
                correct = (predictions == y).sum()
                acc = float(correct)/float(X.shape[0])
                
                writer.add_scalar('TrainingLoss', loss, global_step=step)
                writer.add_scalar('TrainingAccuracy', acc, global_step=step)
                accuracies.append(acc)
                
            print(f'Epoch:{epoch}, MeanLoss:{sum(losses)/len(losses)}')
                

Epoch:0, Mean Loss:0.045598321139744456
Epoch:1, Mean Loss:0.04102607664584156
Epoch:2, Mean Loss:0.03667895719153322
Epoch:3, Mean Loss:0.03308674022577691
Epoch:4, Mean Loss:0.03019424989157523


In [None]:
#! tensorboard --logdir=runs #for tensorboard

**Testing**

In [33]:
correct = 0
total = 0

with torch.no_grad():
  for data in test_loader:
    X, y = data
    X = X.to(device)
    y = y.to(device)
    y_ = model(X)
    
    _, predictions = y_.max(dim=1)
    correct += (predictions== y).sum()
    total += predictions.shape[0]

    #for idx, op_class in enumerate(y_):
      #if torch.argmax(op_class) == y[idx]:correct += 1
      #total += 1

**Accuracy**

In [34]:
correct/total

tensor(0.9715)