<a href="https://colab.research.google.com/github/rajy4683/EVAP2/blob/master/MNIST_Medium_EVA6S4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Final Accuracy: 99.44**
###  Number of parameters - 7632

In [None]:
!nvidia-smi

In [None]:
!pip install pytorch-ignite
!pip install torchsummary
!pip install wandb
!pip install gradio
!pip install netron
!pip install plotly --upgrade

In [None]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchsummary import summary
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['figure.figsize'] = (15, 10)

import pandas as pd
import plotly.express as px
pd.options.plotting.backend = "plotly"

In [None]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchsummary import summary

import logging
logging.propagate = False 
logging.getLogger().setLevel(logging.ERROR)

from argparse import ArgumentParser
from tqdm import tqdm
import os

In [None]:
import logging
logging.propagate = False 
logging.getLogger().setLevel(logging.ERROR)

In [None]:
import wandb
#wandb.init()
!wandb login a6f947d2d2f69e7a8c8ca0f69811fd554f27d204
#wandb login a6f947d2d2f69e7a8c8ca0f69811fd554f27d204

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

In [None]:
class Net(nn.Module):
    def __init__(self, dropout_val=0.1):
        super(Net, self).__init__()
        self.dropout_val = dropout_val
        self.bias = False
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 8, 3, padding=1, stride=1,bias=self.bias), # Input=1x28x28 Output=8x28x28 RF=3
            nn.ReLU(),
            nn.BatchNorm2d(8),
            nn.Dropout(self.dropout_val),
            nn.Conv2d(8, 8, 3, padding=1, stride=1,bias=self.bias), # Input=8x28x28 Output=8x28x28 RF=5
            nn.ReLU(),
            nn.BatchNorm2d(8),
            # nn.Conv2d(8, 8, 3, padding=1, bias=self.bias),
            # nn.ReLU(),
            # nn.BatchNorm2d(8),
            nn.MaxPool2d(2, 2),            # Input=8x28x28 Output=8x14x14 RF=6
            nn.Dropout(self.dropout_val),
            nn.Conv2d(8, 8, 1)
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(8, 8, 3, padding=1,stride=1, bias=self.bias), # Input=8x14x14 Output=8x14x14 RF=10
            nn.ReLU(),
            nn.BatchNorm2d(8),
            nn.Dropout(self.dropout_val),
            nn.Conv2d(8, 16, 3, padding=1, bias=self.bias), # Input=8x14x14 Output=16x14x14 RF=14
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.MaxPool2d(2, 2), # Input=16x14x14 Output=16x7x7 RF=16
            nn.Dropout(self.dropout_val),
            nn.Conv2d(16, 16, 1)
        )
        
        self.conv3 = nn.Sequential(
            nn.Conv2d(16, 16, 3,bias=self.bias), # Input=16x7x7 Output=16x5x5 RF=24
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(self.dropout_val),
            nn.Conv2d(16, 16, 3,bias=self.bias), # Input=16x5x5 Output=16x3x3 RF=32
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.MaxPool2d(2, 2), # Input=16x3x3 Output=16x1x1 RF=36
            nn.Dropout(self.dropout_val)
        )
        
        self.gap_linear = nn.Sequential(
            nn.AdaptiveAvgPool2d((1,1)), # Input=16x1x1 Output=16x1x1 RF=36
            nn.Conv2d(16, 10, 1, bias=self.bias) # Input=16x1x1 Output=10x1x1 RF=36
        )
                
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        
        #x = x.view(x.size(0), -1)
        x = self.gap_linear(x)
        x = x.view(-1, 10)
        x = F.log_softmax(x, dim=1)
        return x

### Final Model


In [None]:
model = Net(dropout_val=0.1).to(device)
summary(model, input_size=(1, 28, 28))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              72
              ReLU-2            [-1, 8, 28, 28]               0
       BatchNorm2d-3            [-1, 8, 28, 28]              16
           Dropout-4            [-1, 8, 28, 28]               0
            Conv2d-5            [-1, 8, 28, 28]             576
              ReLU-6            [-1, 8, 28, 28]               0
       BatchNorm2d-7            [-1, 8, 28, 28]              16
         MaxPool2d-8            [-1, 8, 14, 14]               0
           Dropout-9            [-1, 8, 14, 14]               0
           Conv2d-10            [-1, 8, 14, 14]              72
           Conv2d-11            [-1, 8, 14, 14]             576
             ReLU-12            [-1, 8, 14, 14]               0
      BatchNorm2d-13            [-1, 8, 14, 14]              16
          Dropout-14            [-1, 8,

### Datasets and Basic Transforms

In [None]:
train_transforms = transforms.Compose([
                                      #  transforms.Resize((28, 28)),
                                      #  transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                       #transforms.RandomRotation((-7.0, 7.0), fill=(1,)),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,)) # The mean and std have to be sequences (e.g., tuples), therefore you should add a comma after the values. 
                                       # Note the difference between (0.1307) and (0.1307,)
                                       ])

# Test Phase transformations
test_transforms = transforms.Compose([
                                      #  transforms.Resize((28, 28)),
                                      #  transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                       #transforms.RandomRotation((-7.0, 7.0), fill=(1,)),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,))
                                       ])

In [None]:
torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [None]:
classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
train_losses = []
test_losses = []
train_acc = []
test_acc = []

from tqdm import tqdm
def train(args, model, device, train_loader, optimizer, epoch_number):
    model.train()
    pbar = tqdm(train_loader)
    train_loss = 0
    train_accuracy = 0
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        train_accuracy += pred.eq(target.view_as(pred)).sum().item()

        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
        train_loss += loss.item()

    train_loss /= len(train_loader.dataset)
    print('\nEpoch: {:.0f} Train set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        epoch_number, train_loss, train_accuracy, len(train_loader.dataset),
        100. * train_accuracy / len(train_loader.dataset)))
    train_accuracy = (100. * train_accuracy) / len(train_loader.dataset)
    train_acc.append(train_accuracy)
    train_losses.append(train_loss)

    return train_accuracy, train_loss

def test(args, model, device, test_loader,classes,epoch_number):
    model.eval()
    test_loss = 0
    correct = 0
    example_images = []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
        #example_images.append(wandb.Image(
        #        data[0], caption="Pred: {} Truth: {}".format(classes[pred[0].item()], classes[target[0]])))

    test_loss /= len(test_loader.dataset)
    test_accuracy = (100. * correct) / len(test_loader.dataset)

    print('\nEpoch: {:.0f} Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        epoch_number, test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    test_acc.append(test_accuracy)
    test_losses.append(test_loss)

    return test_accuracy, test_loss

In [None]:
classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
train_losses = []
test_losses = []
train_acc = []
test_acc = []

from tqdm import tqdm
def train(args, model, device, train_loader, optimizer, epoch_number):
    model.train()
    pbar = tqdm(train_loader)
    train_loss = 0
    train_accuracy = 0
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        train_accuracy += pred.eq(target.view_as(pred)).sum().item()

        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
        train_loss += loss.item()

    train_loss /= len(train_loader.dataset)
    print('\nEpoch: {:.0f} Train set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        epoch_number, train_loss, train_accuracy, len(train_loader.dataset),
        100. * train_accuracy / len(train_loader.dataset)))
    train_accuracy = (100. * train_accuracy) / len(train_loader.dataset)
    train_acc.append(train_accuracy)
    train_losses.append(train_loss)

    return train_accuracy, train_loss

def test(args, model, device, test_loader,classes,epoch_number):
    model.eval()
    test_loss = 0
    correct = 0
    example_images = []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
        #example_images.append(wandb.Image(
        #        data[0], caption="Pred: {} Truth: {}".format(classes[pred[0].item()], classes[target[0]])))

    test_loss /= len(test_loader.dataset)
    test_accuracy = (100. * correct) / len(test_loader.dataset)

    print('\nEpoch: {:.0f} Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        epoch_number, test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    test_acc.append(test_accuracy)
    test_losses.append(test_loss)

    return test_accuracy, test_loss

## Attempt 1

In [None]:
from torch.optim.lr_scheduler import StepLR, OneCycleLR
hyperparameter_defaults = dict(
    dropout = 0.1,#0.07114420042272313,
    channels_one = 16,
    channels_two = 32,
    batch_size = 64,
    test_batch_size=34,
    lr = 0.01, #0.030455453938066226, #0.018,# 0.017530428914306426,
    momentum = 0.9, #0.8424379743502641,
    no_cuda = False,
    seed = 1,
    epochs = 19,
    bias = False,
    log_interval = 11,
    sched_lr_gamma = 0.5,
    sched_lr_step= 1,
    start_lr = 5
    )

classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
train_losses = []
test_losses = []
train_acc = []
test_acc = []

wandb.init(config=hyperparameter_defaults, project="news4eva4")
wandb.watch_called = False # Re-run the model without restarting the runtime, unnecessary after our next release
config = wandb.config



def main():
    use_cuda = not config.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}
    
    # Set random seeds and deterministic pytorch for reproducibility
    # random.seed(config.seed)       # python random seed
    torch.manual_seed(config.seed) # pytorch random seed
    # numpy.random.seed(config.seed) # numpy random seed
    torch.backends.cudnn.deterministic = True

    # Load the dataset: We're training our CNN on CIFAR10 (https://www.cs.toronto.edu/~kriz/cifar.html)
    # First we define the tranformations to apply to our images
    #kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=True, download=True,
                        transform=train_transforms),
        batch_size=config.batch_size, shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=False, transform=test_transforms),
        batch_size=config.batch_size, shuffle=True, **kwargs)

    # Initialize our model, recursively go over all modules and convert their parameters and buffers to CUDA tensors (if device is set to cuda)
    model = Net(dropout_val=config.dropout).to(device)
    optimizer = optim.SGD(model.parameters(), lr=config.lr,
                          momentum=config.momentum)
    
    # scheduler = StepLR(optimizer, step_size=config.sched_lr_step, gamma=config.sched_lr_gamma)
    #scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=config.lr, steps_per_epoch=len(train_loader), epochs=10)
    # WandB – wandb.watch() automatically fetches all layer dimensions, gradients, model parameters and logs them automatically to your dashboard.
    # Using log="all" log histograms of parameter values in addition to gradients
    wandb.watch(model, log="all")

    for epoch in range(1, config.epochs + 1):
        epoch_train_acc,epoch_train_loss = train(config, model, device, train_loader, optimizer, epoch)        
        epoch_test_acc,epoch_test_loss = test(config, model, device, test_loader, classes,epoch)
        # wandb.log({ "Train Accuracy": epoch_train_acc, 
        #            "Train Loss": epoch_train_loss, 
        #            "Test Accuracy":epoch_test_acc, 
        #            "Test Loss": epoch_test_loss,
        #            #"Learning Rate": config.lr})
        #            "Learning Rate": scheduler.get_lr()})
        wandb.log({ "Train Accuracy": epoch_train_acc, 
            "Train Loss": epoch_train_loss, 
            "Test Accuracy":epoch_test_acc, 
            "Test Loss": epoch_test_loss})
        
    # WandB – Save the model checkpoint. This automatically saves a file to the cloud and associates it with the current run.
    torch.save(model.state_dict(), "model.pth")
    wandb.save('model.pth')

if __name__ == '__main__':
    main()

VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Train Accuracy,98.77333
Train Loss,0.00031
Test Accuracy,99.05
Test Loss,0.02886
_runtime,234.0
_timestamp,1622025900.0
_step,18.0


0,1
Train Accuracy,▁▆▇▇▇▇█████████████
Train Loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
Test Accuracy,▁▃▆▆▆▇▇█▇▇▇▇▇█████▇
Test Loss,█▆▃▃▃▂▂▁▁▂▁▂▂▁▁▁▁▁▂
_runtime,▁▁▂▂▃▃▃▄▄▄▅▅▆▆▆▇▇██
_timestamp,▁▁▂▂▃▃▃▄▄▄▅▅▆▆▆▇▇██
_step,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇██


  cpuset_checked))
loss=0.0694517195224762 batch_id=937: 100%|██████████| 938/938 [00:14<00:00, 62.55it/s]


Epoch: 1 Train set: Average loss: 0.0039, Accuracy: 55471/60000 (92.452%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 1 Test set: Average loss: 0.0551, Accuracy: 9830/10000 (98.300%)



loss=0.0971817895770073 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 62.16it/s]


Epoch: 2 Train set: Average loss: 0.0016, Accuracy: 58171/60000 (96.952%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 2 Test set: Average loss: 0.0408, Accuracy: 9866/10000 (98.660%)



loss=0.03157374635338783 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 62.50it/s]


Epoch: 3 Train set: Average loss: 0.0013, Accuracy: 58517/60000 (97.528%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 3 Test set: Average loss: 0.0302, Accuracy: 9903/10000 (99.030%)



loss=0.06512744724750519 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.92it/s]


Epoch: 4 Train set: Average loss: 0.0012, Accuracy: 58646/60000 (97.743%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 4 Test set: Average loss: 0.0305, Accuracy: 9905/10000 (99.050%)



loss=0.07279963046312332 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 62.40it/s]


Epoch: 5 Train set: Average loss: 0.0011, Accuracy: 58763/60000 (97.938%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 5 Test set: Average loss: 0.0306, Accuracy: 9902/10000 (99.020%)



loss=0.008938002400100231 batch_id=937: 100%|██████████| 938/938 [00:16<00:00, 58.59it/s]


Epoch: 6 Train set: Average loss: 0.0009, Accuracy: 58903/60000 (98.172%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 6 Test set: Average loss: 0.0275, Accuracy: 9914/10000 (99.140%)



loss=0.09788103401660919 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 59.07it/s]



Epoch: 7 Train set: Average loss: 0.0009, Accuracy: 58918/60000 (98.197%)



  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 7 Test set: Average loss: 0.0283, Accuracy: 9910/10000 (99.100%)



loss=0.08261961489915848 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.32it/s]


Epoch: 8 Train set: Average loss: 0.0008, Accuracy: 59030/60000 (98.383%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 8 Test set: Average loss: 0.0257, Accuracy: 9914/10000 (99.140%)



loss=0.2145431488752365 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.15it/s]


Epoch: 9 Train set: Average loss: 0.0008, Accuracy: 59019/60000 (98.365%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 9 Test set: Average loss: 0.0259, Accuracy: 9923/10000 (99.230%)



loss=0.006780568510293961 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.00it/s]


Epoch: 10 Train set: Average loss: 0.0008, Accuracy: 59040/60000 (98.400%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 10 Test set: Average loss: 0.0246, Accuracy: 9930/10000 (99.300%)



loss=0.025659779086709023 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 59.74it/s]



Epoch: 11 Train set: Average loss: 0.0008, Accuracy: 59125/60000 (98.542%)



  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 11 Test set: Average loss: 0.0234, Accuracy: 9934/10000 (99.340%)



loss=0.11572294682264328 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.90it/s]


Epoch: 12 Train set: Average loss: 0.0008, Accuracy: 59090/60000 (98.483%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 12 Test set: Average loss: 0.0247, Accuracy: 9931/10000 (99.310%)



loss=0.13506565988063812 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.87it/s]


Epoch: 13 Train set: Average loss: 0.0007, Accuracy: 59144/60000 (98.573%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 13 Test set: Average loss: 0.0232, Accuracy: 9940/10000 (99.400%)



loss=0.019705627113580704 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.52it/s]


Epoch: 14 Train set: Average loss: 0.0007, Accuracy: 59181/60000 (98.635%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 14 Test set: Average loss: 0.0203, Accuracy: 9944/10000 (99.440%)



loss=0.00933801755309105 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.99it/s]


Epoch: 15 Train set: Average loss: 0.0007, Accuracy: 59160/60000 (98.600%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 15 Test set: Average loss: 0.0235, Accuracy: 9926/10000 (99.260%)



loss=0.06273490190505981 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.74it/s]


Epoch: 16 Train set: Average loss: 0.0007, Accuracy: 59191/60000 (98.652%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 16 Test set: Average loss: 0.0252, Accuracy: 9928/10000 (99.280%)



loss=0.07527562230825424 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 61.01it/s]


Epoch: 17 Train set: Average loss: 0.0007, Accuracy: 59217/60000 (98.695%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 17 Test set: Average loss: 0.0201, Accuracy: 9941/10000 (99.410%)



loss=0.016707701608538628 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.62it/s]



Epoch: 18 Train set: Average loss: 0.0006, Accuracy: 59252/60000 (98.753%)



  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 18 Test set: Average loss: 0.0212, Accuracy: 9934/10000 (99.340%)



loss=0.020643873140215874 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.46it/s]



Epoch: 19 Train set: Average loss: 0.0006, Accuracy: 59230/60000 (98.717%)


Epoch: 19 Test set: Average loss: 0.0237, Accuracy: 9935/10000 (99.350%)



## Final Attempt

In [None]:
from torch.optim.lr_scheduler import StepLR, OneCycleLR
hyperparameter_defaults = dict(
    dropout = 0.1,#0.07114420042272313,
    channels_one = 16,
    channels_two = 32,
    batch_size = 64,
    test_batch_size=34,
    lr = 0.01, #0.030455453938066226, #0.018,# 0.017530428914306426,
    momentum = 0.9, #0.8424379743502641,
    no_cuda = False,
    seed = 1,
    epochs = 19,
    bias = False,
    log_interval = 11,
    sched_lr_gamma = 0.5,
    sched_lr_step= 1,
    start_lr = 5
    )

classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
train_losses = []
test_losses = []
train_acc = []
test_acc = []

wandb.init(config=hyperparameter_defaults, project="news4eva4")
wandb.watch_called = False # Re-run the model without restarting the runtime, unnecessary after our next release
config = wandb.config



def main():
    use_cuda = not config.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}
    
    # Set random seeds and deterministic pytorch for reproducibility
    # random.seed(config.seed)       # python random seed
    torch.manual_seed(config.seed) # pytorch random seed
    # numpy.random.seed(config.seed) # numpy random seed
    torch.backends.cudnn.deterministic = True

    # Load the dataset: We're training our CNN on CIFAR10 (https://www.cs.toronto.edu/~kriz/cifar.html)
    # First we define the tranformations to apply to our images
    #kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=True, download=True,
                        transform=train_transforms),
        batch_size=config.batch_size, shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=False, transform=test_transforms),
        batch_size=config.batch_size, shuffle=True, **kwargs)

    # Initialize our model, recursively go over all modules and convert their parameters and buffers to CUDA tensors (if device is set to cuda)
    model = Net(dropout_val=config.dropout).to(device)
    optimizer = optim.SGD(model.parameters(), lr=config.lr,
                          momentum=config.momentum)
    
    # scheduler = StepLR(optimizer, step_size=config.sched_lr_step, gamma=config.sched_lr_gamma)
    #scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=config.lr, steps_per_epoch=len(train_loader), epochs=10)
    # WandB – wandb.watch() automatically fetches all layer dimensions, gradients, model parameters and logs them automatically to your dashboard.
    # Using log="all" log histograms of parameter values in addition to gradients
    wandb.watch(model, log="all")

    for epoch in range(1, config.epochs + 1):
        epoch_train_acc,epoch_train_loss = train(config, model, device, train_loader, optimizer, epoch)        
        epoch_test_acc,epoch_test_loss = test(config, model, device, test_loader, classes,epoch)
        # wandb.log({ "Train Accuracy": epoch_train_acc, 
        #            "Train Loss": epoch_train_loss, 
        #            "Test Accuracy":epoch_test_acc, 
        #            "Test Loss": epoch_test_loss,
        #            #"Learning Rate": config.lr})
        #            "Learning Rate": scheduler.get_lr()})
        wandb.log({ "Train Accuracy": epoch_train_acc, 
            "Train Loss": epoch_train_loss, 
            "Test Accuracy":epoch_test_acc, 
            "Test Loss": epoch_test_loss})
        
    # WandB – Save the model checkpoint. This automatically saves a file to the cloud and associates it with the current run.
    torch.save(model.state_dict(), "model.pth")
    wandb.save('model.pth')

if __name__ == '__main__':
    main()

VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Train Accuracy,98.77333
Train Loss,0.00031
Test Accuracy,99.05
Test Loss,0.02886
_runtime,234.0
_timestamp,1622025900.0
_step,18.0


0,1
Train Accuracy,▁▆▇▇▇▇█████████████
Train Loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
Test Accuracy,▁▃▆▆▆▇▇█▇▇▇▇▇█████▇
Test Loss,█▆▃▃▃▂▂▁▁▂▁▂▂▁▁▁▁▁▂
_runtime,▁▁▂▂▃▃▃▄▄▄▅▅▆▆▆▇▇██
_timestamp,▁▁▂▂▃▃▃▄▄▄▅▅▆▆▆▇▇██
_step,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇██


  cpuset_checked))
loss=0.0694517195224762 batch_id=937: 100%|██████████| 938/938 [00:14<00:00, 62.55it/s]


Epoch: 1 Train set: Average loss: 0.0039, Accuracy: 55471/60000 (92.452%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 1 Test set: Average loss: 0.0551, Accuracy: 9830/10000 (98.300%)



loss=0.0971817895770073 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 62.16it/s]


Epoch: 2 Train set: Average loss: 0.0016, Accuracy: 58171/60000 (96.952%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 2 Test set: Average loss: 0.0408, Accuracy: 9866/10000 (98.660%)



loss=0.03157374635338783 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 62.50it/s]


Epoch: 3 Train set: Average loss: 0.0013, Accuracy: 58517/60000 (97.528%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 3 Test set: Average loss: 0.0302, Accuracy: 9903/10000 (99.030%)



loss=0.06512744724750519 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.92it/s]


Epoch: 4 Train set: Average loss: 0.0012, Accuracy: 58646/60000 (97.743%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 4 Test set: Average loss: 0.0305, Accuracy: 9905/10000 (99.050%)



loss=0.07279963046312332 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 62.40it/s]


Epoch: 5 Train set: Average loss: 0.0011, Accuracy: 58763/60000 (97.938%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 5 Test set: Average loss: 0.0306, Accuracy: 9902/10000 (99.020%)



loss=0.008938002400100231 batch_id=937: 100%|██████████| 938/938 [00:16<00:00, 58.59it/s]


Epoch: 6 Train set: Average loss: 0.0009, Accuracy: 58903/60000 (98.172%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 6 Test set: Average loss: 0.0275, Accuracy: 9914/10000 (99.140%)



loss=0.09788103401660919 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 59.07it/s]



Epoch: 7 Train set: Average loss: 0.0009, Accuracy: 58918/60000 (98.197%)



  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 7 Test set: Average loss: 0.0283, Accuracy: 9910/10000 (99.100%)



loss=0.08261961489915848 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.32it/s]


Epoch: 8 Train set: Average loss: 0.0008, Accuracy: 59030/60000 (98.383%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 8 Test set: Average loss: 0.0257, Accuracy: 9914/10000 (99.140%)



loss=0.2145431488752365 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.15it/s]


Epoch: 9 Train set: Average loss: 0.0008, Accuracy: 59019/60000 (98.365%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 9 Test set: Average loss: 0.0259, Accuracy: 9923/10000 (99.230%)



loss=0.006780568510293961 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.00it/s]


Epoch: 10 Train set: Average loss: 0.0008, Accuracy: 59040/60000 (98.400%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 10 Test set: Average loss: 0.0246, Accuracy: 9930/10000 (99.300%)



loss=0.025659779086709023 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 59.74it/s]



Epoch: 11 Train set: Average loss: 0.0008, Accuracy: 59125/60000 (98.542%)



  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 11 Test set: Average loss: 0.0234, Accuracy: 9934/10000 (99.340%)



loss=0.11572294682264328 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.90it/s]


Epoch: 12 Train set: Average loss: 0.0008, Accuracy: 59090/60000 (98.483%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 12 Test set: Average loss: 0.0247, Accuracy: 9931/10000 (99.310%)



loss=0.13506565988063812 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.87it/s]


Epoch: 13 Train set: Average loss: 0.0007, Accuracy: 59144/60000 (98.573%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 13 Test set: Average loss: 0.0232, Accuracy: 9940/10000 (99.400%)



loss=0.019705627113580704 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.52it/s]


Epoch: 14 Train set: Average loss: 0.0007, Accuracy: 59181/60000 (98.635%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 14 Test set: Average loss: 0.0203, Accuracy: 9944/10000 (99.440%)



loss=0.00933801755309105 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.99it/s]


Epoch: 15 Train set: Average loss: 0.0007, Accuracy: 59160/60000 (98.600%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 15 Test set: Average loss: 0.0235, Accuracy: 9926/10000 (99.260%)



loss=0.06273490190505981 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.74it/s]


Epoch: 16 Train set: Average loss: 0.0007, Accuracy: 59191/60000 (98.652%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 16 Test set: Average loss: 0.0252, Accuracy: 9928/10000 (99.280%)



loss=0.07527562230825424 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 61.01it/s]


Epoch: 17 Train set: Average loss: 0.0007, Accuracy: 59217/60000 (98.695%)




  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 17 Test set: Average loss: 0.0201, Accuracy: 9941/10000 (99.410%)



loss=0.016707701608538628 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.62it/s]



Epoch: 18 Train set: Average loss: 0.0006, Accuracy: 59252/60000 (98.753%)



  0%|          | 0/938 [00:00<?, ?it/s]


Epoch: 18 Test set: Average loss: 0.0212, Accuracy: 9934/10000 (99.340%)



loss=0.020643873140215874 batch_id=937: 100%|██████████| 938/938 [00:15<00:00, 60.46it/s]



Epoch: 19 Train set: Average loss: 0.0006, Accuracy: 59230/60000 (98.717%)


Epoch: 19 Test set: Average loss: 0.0237, Accuracy: 9935/10000 (99.350%)



## RESULTS of the Final Run

In [None]:
def plot_metrics(metrics_dataframe_local):
    dataset_metrics = metrics_dataframe_local.loc[:,['Test Accuracy', 'Test Loss']].dropna().reset_index().drop(columns='index')
    final_run_metrics = pd.concat([metrics_dataframe.loc[:,['Train Accuracy', 'Train Loss']].dropna().reset_index().drop(columns='index'), 
                                   metrics_dataframe.loc[:,['Test Accuracy', 'Test Loss']].dropna().reset_index().drop(columns='index')],axis=1)
    return final_run_metrics
    # final_run_metrics.loc[:,['Train Accuracy', 'Test Accuracy']].plot()
    # final_run_metrics.loc[:,['Train Loss', 'Test Loss']].plot()


In [None]:
import wandb
#api = wandb.Api()

# run is specified by <entity>/<project>/<run id>
runs = api.runs('rajy4683/news4eva4')
run = runs.objects[0] #.history()

# save the metrics for the run to a csv file
metrics_dataframe = run.history()
metrics_dataframe.to_csv("metrics.csv")

In [None]:
run.name

'whole-shadow-452'

In [None]:
run.lastHistoryStep

18

In [None]:
max_accuracy_idx = metrics_dataframe['Test Accuracy'].idxmax()
metrics_dataframe.loc[max_accuracy_idx, ['_step', 'Test Accuracy', 'Train Accuracy', 'Train Loss', 'Test Loss']]

_step                      13
Test Accuracy           99.44
Train Accuracy         98.635
Train Loss        0.000689814
Test Loss           0.0203294
Name: 13, dtype: object

In [None]:
metrics_dataframe[['Test Accuracy', 'Train Accuracy']].plot()

In [None]:
metrics_dataframe[['Test Loss', 'Train Loss']].plot()