<a href="https://colab.research.google.com/github/rajy4683/EVAP2/blob/master/S5EVA6_Attempt2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Final Accuracy: 99.44**
###  Number of parameters - 7360

Target:
1. Reduce parameters from previous model by decreasing the number of channels in 2nd conv block
2. Introducing 1x1 between 1st conv block and 2nd conv block.
3. Augment images with Image Rotation of +/-7.0

Results:
Total Parameters: 7310

        Epoch: 14 Test set: Average loss: 0.0182, Accuracy: 9943/10000 (99.430%)
        Epoch: 15 Test set: Average loss: 0.0198, Accuracy: 9940/10000 (99.400%)

Analysis:
1. Base model performance achieve
2. Model is still in the underfitting zone
3. In this attempt training was done with and without Augmentation and observed atleast 0.5% jump in validation accuracy with Augmentation.
3. Last few epochs hover between 99.36-99.39.
4. The 1x1 looked a bit redundant as the channel count was same between the layers

In [1]:
!nvidia-smi

Fri Jun  4 17:13:59 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.27       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   49C    P0    28W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!pip install pytorch-ignite
!pip install torchsummary
!pip install wandb
!pip install gradio
!pip install netron
!pip install plotly --upgrade

In [3]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchsummary import summary
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['figure.figsize'] = (15, 10)

import pandas as pd
import plotly.express as px
pd.options.plotting.backend = "plotly"

In [4]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchsummary import summary

import logging
logging.propagate = False 
logging.getLogger().setLevel(logging.ERROR)

from argparse import ArgumentParser
from tqdm import tqdm
import os

In [5]:
import logging
logging.propagate = False 
logging.getLogger().setLevel(logging.ERROR)

In [6]:
import wandb
#wandb.init()


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [7]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

In [43]:
class Net(nn.Module):
    def __init__(self, dropout_val=0.1):
        super(Net, self).__init__()
        self.dropout_val = dropout_val
        self.bias = False
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 8, 3, padding=1, stride=1,bias=self.bias), # Input=1x28x28 Output=8x28x28 RF=3
            nn.ReLU(),
            nn.BatchNorm2d(8),
            nn.Dropout(self.dropout_val),
            nn.Conv2d(8, 8, 3, padding=1, stride=1,bias=self.bias), # Input=8x28x28 Output=8x28x28 RF=5
            nn.ReLU(),
            nn.BatchNorm2d(8),
            # nn.Conv2d(8, 8, 3, padding=1, bias=self.bias),
            # nn.ReLU(),
            # nn.BatchNorm2d(8),
            nn.MaxPool2d(2, 2),            # Input=8x28x28 Output=8x14x14 RF=6
            nn.Dropout(self.dropout_val),
            nn.Conv2d(8, 8, 1)
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(8, 8, 3, padding=1,stride=1, bias=self.bias), # Input=8x14x14 Output=8x14x14 RF=10
            nn.ReLU(),
            nn.BatchNorm2d(8),
            nn.Dropout(self.dropout_val),
            # nn.Conv2d(8, 16, 1),
            nn.Conv2d(8, 16, 3, padding=1, bias=self.bias), # Input=8x14x14 Output=16x14x14 RF=14
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.MaxPool2d(2, 2), # Input=16x14x14 Output=16x7x7 RF=16
            nn.Dropout(self.dropout_val),
            # nn.Conv2d(16, 16, 1)
        )
        
        self.conv3 = nn.Sequential(
            nn.Conv2d(16, 16, 3,bias=self.bias), # Input=16x7x7 Output=16x5x5 RF=24
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(self.dropout_val),
            nn.Conv2d(16, 16, 3,bias=self.bias), # Input=16x5x5 Output=16x3x3 RF=32
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.MaxPool2d(2, 2), # Input=16x3x3 Output=16x1x1 RF=36
            nn.Dropout(self.dropout_val)
        )
        
        self.gap_linear = nn.Sequential(
            nn.AdaptiveAvgPool2d((1,1)), 
            nn.Conv2d(16, 10, 1, bias=self.bias)
        )
                
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        
        #x = x.view(x.size(0), -1)
        x = self.gap_linear(x)
        x = x.view(-1, 10)
        x = F.log_softmax(x, dim=1)
        return x

### Final Model


In [44]:
model = Net(dropout_val=0.1).to(device)
summary(model, input_size=(1, 28, 28))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              72
              ReLU-2            [-1, 8, 28, 28]               0
       BatchNorm2d-3            [-1, 8, 28, 28]              16
           Dropout-4            [-1, 8, 28, 28]               0
            Conv2d-5            [-1, 8, 28, 28]             576
              ReLU-6            [-1, 8, 28, 28]               0
       BatchNorm2d-7            [-1, 8, 28, 28]              16
         MaxPool2d-8            [-1, 8, 14, 14]               0
           Dropout-9            [-1, 8, 14, 14]               0
           Conv2d-10            [-1, 8, 14, 14]              72
           Conv2d-11            [-1, 8, 14, 14]             576
             ReLU-12            [-1, 8, 14, 14]               0
      BatchNorm2d-13            [-1, 8, 14, 14]              16
          Dropout-14            [-1, 8,

### Datasets and Basic Transforms

In [34]:
train_transforms = transforms.Compose([
                                      #  transforms.Resize((28, 28)),
                                      #  transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                       transforms.RandomRotation((-7.0, 7.0), fill=(1,)),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,)) # The mean and std have to be sequences (e.g., tuples), therefore you should add a comma after the values. 
                                       # Note the difference between (0.1307) and (0.1307,)
                                       ])

# Test Phase transformations
test_transforms = transforms.Compose([
                                      #  transforms.Resize((28, 28)),
                                      #  transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                       #transforms.RandomRotation((-7.0, 7.0), fill=(1,)),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,))
                                       ])

In [25]:
torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [26]:
classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
train_losses = []
test_losses = []
train_acc = []
test_acc = []

from tqdm import tqdm
def train(args, model, device, train_loader, optimizer, epoch_number):
    model.train()
    pbar = tqdm(train_loader)
    train_loss = 0
    train_accuracy = 0
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        train_accuracy += pred.eq(target.view_as(pred)).sum().item()

        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
        train_loss += loss.item()

    train_loss /= len(train_loader.dataset)
    print('\nEpoch: {:.0f} Train set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        epoch_number, train_loss, train_accuracy, len(train_loader.dataset),
        100. * train_accuracy / len(train_loader.dataset)))
    train_accuracy = (100. * train_accuracy) / len(train_loader.dataset)
    train_acc.append(train_accuracy)
    train_losses.append(train_loss)

    return train_accuracy, train_loss

def test(args, model, device, test_loader,classes,epoch_number):
    model.eval()
    test_loss = 0
    correct = 0
    example_images = []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
        #example_images.append(wandb.Image(
        #        data[0], caption="Pred: {} Truth: {}".format(classes[pred[0].item()], classes[target[0]])))

    test_loss /= len(test_loader.dataset)
    test_accuracy = (100. * correct) / len(test_loader.dataset)

    print('\nEpoch: {:.0f} Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        epoch_number, test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    test_acc.append(test_accuracy)
    test_losses.append(test_loss)

    return test_accuracy, test_loss

In [27]:
classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
train_losses = []
test_losses = []
train_acc = []
test_acc = []

from tqdm import tqdm
def train(args, model, device, train_loader, optimizer, epoch_number):
    model.train()
    pbar = tqdm(train_loader)
    train_loss = 0
    train_accuracy = 0
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        train_accuracy += pred.eq(target.view_as(pred)).sum().item()

        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
        train_loss += loss.item()

    train_loss /= len(train_loader.dataset)
    print('\nEpoch: {:.0f} Train set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        epoch_number, train_loss, train_accuracy, len(train_loader.dataset),
        100. * train_accuracy / len(train_loader.dataset)))
    train_accuracy = (100. * train_accuracy) / len(train_loader.dataset)
    train_acc.append(train_accuracy)
    train_losses.append(train_loss)

    return train_accuracy, train_loss

def test(args, model, device, test_loader,classes,epoch_number):
    model.eval()
    test_loss = 0
    correct = 0
    example_images = []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
        #example_images.append(wandb.Image(
        #        data[0], caption="Pred: {} Truth: {}".format(classes[pred[0].item()], classes[target[0]])))

    test_loss /= len(test_loader.dataset)
    test_accuracy = (100. * correct) / len(test_loader.dataset)

    print('\nEpoch: {:.0f} Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        epoch_number, test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    test_acc.append(test_accuracy)
    test_losses.append(test_loss)

    return test_accuracy, test_loss

## Final Attempt

In [45]:
from torch.optim.lr_scheduler import StepLR, OneCycleLR

hyperparameter_defaults = dict(
    dropout = 0.069,#0.07114420042272313,
    channels_one = 16,
    channels_two = 32,
    batch_size = 128,
    test_batch_size=34,
    lr = 0.04104, #0.030455453938066226, #0.018,# 0.017530428914306426,
    momentum = 0.9, #0.8424379743502641,
    no_cuda = False,
    seed = 1,
    epochs = 15,
    bias = False,
    log_interval = 10,
    sched_lr_gamma = 0.25,
    sched_lr_step= 1,
    start_lr = 8
    )

classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
train_losses = []
test_losses = []
train_acc = []
test_acc = []

wandb.init(config=hyperparameter_defaults, project="news4eva4")
wandb.watch_called = False # Re-run the model without restarting the runtime, unnecessary after our next release
config = wandb.config



def main():
    use_cuda = not config.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}
    
    # Set random seeds and deterministic pytorch for reproducibility
    # random.seed(config.seed)       # python random seed
    torch.manual_seed(config.seed) # pytorch random seed
    # numpy.random.seed(config.seed) # numpy random seed
    torch.backends.cudnn.deterministic = True

    # Load the dataset: We're training our CNN on CIFAR10 (https://www.cs.toronto.edu/~kriz/cifar.html)
    # First we define the tranformations to apply to our images
    #kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=True, download=True,
                        transform=train_transforms),
        batch_size=config.batch_size, shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=False, transform=test_transforms),
        batch_size=config.batch_size, shuffle=True, **kwargs)

    # Initialize our model, recursively go over all modules and convert their parameters and buffers to CUDA tensors (if device is set to cuda)
    model = Net(dropout_val=config.dropout).to(device)
    optimizer = optim.SGD(model.parameters(), lr=config.lr,
                          momentum=config.momentum)
    
    # scheduler = StepLR(optimizer, step_size=config.sched_lr_step, gamma=config.sched_lr_gamma)
    #scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=config.lr, steps_per_epoch=len(train_loader), epochs=10)
    # WandB – wandb.watch() automatically fetches all layer dimensions, gradients, model parameters and logs them automatically to your dashboard.
    # Using log="all" log histograms of parameter values in addition to gradients
    wandb.watch(model, log="all")

    for epoch in range(1, config.epochs + 1):
        epoch_train_acc,epoch_train_loss = train(config, model, device, train_loader, optimizer, epoch)        
        epoch_test_acc,epoch_test_loss = test(config, model, device, test_loader, classes,epoch)
        # wandb.log({ "Train Accuracy": epoch_train_acc, 
        #            "Train Loss": epoch_train_loss, 
        #            "Test Accuracy":epoch_test_acc, 
        #            "Test Loss": epoch_test_loss,
        #            #"Learning Rate": config.lr})
        #            "Learning Rate": scheduler.get_last_lr()})
        # if(epoch > config.start_lr):
        #     scheduler.step()

        wandb.log({ "Train Accuracy": epoch_train_acc, 
            "Train Loss": epoch_train_loss, 
            "Test Accuracy":epoch_test_acc, 
            "Test Loss": epoch_test_loss})
        
    # WandB – Save the model checkpoint. This automatically saves a file to the cloud and associates it with the current run.
    torch.save(model.state_dict(), "model.pth")
    wandb.save('model.pth')

if __name__ == '__main__':
    main()

VBox(children=(Label(value=' 0.05MB of 0.05MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Train Accuracy,98.85
Train Loss,0.00029
Test Accuracy,99.29
Test Loss,0.02166
_runtime,348.0
_timestamp,1622831961.0
_step,14.0


0,1
Train Accuracy,▁▆▇▇▇▇▇████████
Train Loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁
Test Accuracy,▁▂▆▆▆▇▇▇█▇▆███▇
Test Loss,█▆▃▂▂▂▂▂▁▂▂▂▁▁▂
_runtime,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█
_timestamp,▁▁▂▂▃▃▄▄▅▅▆▆▇▇█
_step,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█


  cpuset_checked))
loss=0.25968101620674133 batch_id=468: 100%|██████████| 469/469 [00:20<00:00, 22.51it/s]


Epoch: 1 Train set: Average loss: 0.0016, Accuracy: 56116/60000 (93.527%)




  0%|          | 0/469 [00:00<?, ?it/s]


Epoch: 1 Test set: Average loss: 0.0449, Accuracy: 9853/10000 (98.530%)



loss=0.12625524401664734 batch_id=468: 100%|██████████| 469/469 [00:20<00:00, 22.46it/s]


Epoch: 2 Train set: Average loss: 0.0007, Accuracy: 58398/60000 (97.330%)




  0%|          | 0/469 [00:00<?, ?it/s]


Epoch: 2 Test set: Average loss: 0.0370, Accuracy: 9868/10000 (98.680%)



loss=0.04286349192261696 batch_id=468: 100%|██████████| 469/469 [00:20<00:00, 22.48it/s]


Epoch: 3 Train set: Average loss: 0.0005, Accuracy: 58709/60000 (97.848%)




  0%|          | 0/469 [00:00<?, ?it/s]


Epoch: 3 Test set: Average loss: 0.0328, Accuracy: 9887/10000 (98.870%)



loss=0.08610495179891586 batch_id=468: 100%|██████████| 469/469 [00:21<00:00, 21.98it/s]


Epoch: 4 Train set: Average loss: 0.0005, Accuracy: 58897/60000 (98.162%)




  0%|          | 0/469 [00:00<?, ?it/s]


Epoch: 4 Test set: Average loss: 0.0295, Accuracy: 9896/10000 (98.960%)



loss=0.1525675356388092 batch_id=468: 100%|██████████| 469/469 [00:21<00:00, 21.67it/s]


Epoch: 5 Train set: Average loss: 0.0004, Accuracy: 58989/60000 (98.315%)




  0%|          | 0/469 [00:00<?, ?it/s]


Epoch: 5 Test set: Average loss: 0.0285, Accuracy: 9904/10000 (99.040%)



loss=0.016572201624512672 batch_id=468: 100%|██████████| 469/469 [00:21<00:00, 21.53it/s]


Epoch: 6 Train set: Average loss: 0.0004, Accuracy: 59016/60000 (98.360%)




  0%|          | 0/469 [00:00<?, ?it/s]


Epoch: 6 Test set: Average loss: 0.0257, Accuracy: 9910/10000 (99.100%)



loss=0.006713265553116798 batch_id=468: 100%|██████████| 469/469 [00:21<00:00, 22.27it/s]


Epoch: 7 Train set: Average loss: 0.0004, Accuracy: 59050/60000 (98.417%)




  0%|          | 0/469 [00:00<?, ?it/s]


Epoch: 7 Test set: Average loss: 0.0235, Accuracy: 9920/10000 (99.200%)



loss=0.14537405967712402 batch_id=468: 100%|██████████| 469/469 [00:21<00:00, 22.19it/s]


Epoch: 8 Train set: Average loss: 0.0004, Accuracy: 59116/60000 (98.527%)




  0%|          | 0/469 [00:00<?, ?it/s]


Epoch: 8 Test set: Average loss: 0.0269, Accuracy: 9910/10000 (99.100%)



loss=0.09399048238992691 batch_id=468: 100%|██████████| 469/469 [00:21<00:00, 22.15it/s]


Epoch: 9 Train set: Average loss: 0.0004, Accuracy: 59149/60000 (98.582%)




  0%|          | 0/469 [00:00<?, ?it/s]


Epoch: 9 Test set: Average loss: 0.0211, Accuracy: 9934/10000 (99.340%)



loss=0.03478742763400078 batch_id=468: 100%|██████████| 469/469 [00:21<00:00, 22.27it/s]


Epoch: 10 Train set: Average loss: 0.0004, Accuracy: 59125/60000 (98.542%)




  0%|          | 0/469 [00:00<?, ?it/s]


Epoch: 10 Test set: Average loss: 0.0228, Accuracy: 9925/10000 (99.250%)



loss=0.015803666785359383 batch_id=468: 100%|██████████| 469/469 [00:21<00:00, 21.98it/s]


Epoch: 11 Train set: Average loss: 0.0003, Accuracy: 59161/60000 (98.602%)




  0%|          | 0/469 [00:00<?, ?it/s]


Epoch: 11 Test set: Average loss: 0.0209, Accuracy: 9935/10000 (99.350%)



loss=0.09644711017608643 batch_id=468: 100%|██████████| 469/469 [00:21<00:00, 22.12it/s]


Epoch: 12 Train set: Average loss: 0.0003, Accuracy: 59195/60000 (98.658%)




  0%|          | 0/469 [00:00<?, ?it/s]


Epoch: 12 Test set: Average loss: 0.0211, Accuracy: 9935/10000 (99.350%)



loss=0.04702411964535713 batch_id=468: 100%|██████████| 469/469 [00:21<00:00, 22.10it/s]


Epoch: 13 Train set: Average loss: 0.0003, Accuracy: 59233/60000 (98.722%)




  0%|          | 0/469 [00:00<?, ?it/s]


Epoch: 13 Test set: Average loss: 0.0217, Accuracy: 9936/10000 (99.360%)



loss=0.05563393235206604 batch_id=468: 100%|██████████| 469/469 [00:21<00:00, 22.20it/s]



Epoch: 14 Train set: Average loss: 0.0003, Accuracy: 59263/60000 (98.772%)



  0%|          | 0/469 [00:00<?, ?it/s]


Epoch: 14 Test set: Average loss: 0.0182, Accuracy: 9943/10000 (99.430%)



loss=0.02111138589680195 batch_id=468: 100%|██████████| 469/469 [00:21<00:00, 22.09it/s]


Epoch: 15 Train set: Average loss: 0.0003, Accuracy: 59266/60000 (98.777%)







Epoch: 15 Test set: Average loss: 0.0198, Accuracy: 9940/10000 (99.400%)



## RESULTS of the Final Run

In [47]:
def plot_metrics(metrics_dataframe_local):
    dataset_metrics = metrics_dataframe_local.loc[:,['Test Accuracy', 'Test Loss']].dropna().reset_index().drop(columns='index')
    final_run_metrics = pd.concat([metrics_dataframe.loc[:,['Train Accuracy', 'Train Loss']].dropna().reset_index().drop(columns='index'), 
                                   metrics_dataframe.loc[:,['Test Accuracy', 'Test Loss']].dropna().reset_index().drop(columns='index')],axis=1)
    return final_run_metrics
    # final_run_metrics.loc[:,['Train Accuracy', 'Test Accuracy']].plot()
    # final_run_metrics.loc[:,['Train Loss', 'Test Loss']].plot()


In [55]:
import wandb
api = wandb.Api()

# run is specified by <entity>/<project>/<run id>
runs = api.runs('rajy4683/news4eva4')
#run = runs.objects[0] #.history()
for itr in runs:
    if itr.name == 'zesty-smoke-586':
        run = itr

# save the metrics for the run to a csv file
metrics_dataframe = run.history()
metrics_dataframe.to_csv("metrics.csv")

In [56]:
run.name

'zesty-smoke-586'

In [57]:
run.lastHistoryStep

14

In [58]:
max_accuracy_idx = metrics_dataframe['Test Accuracy'].idxmax()
metrics_dataframe.loc[max_accuracy_idx, ['_step', 'Test Accuracy', 'Train Accuracy', 'Train Loss', 'Test Loss']]

_step                      12
Test Accuracy           99.42
Train Accuracy        98.9033
Train Loss        0.000266748
Test Loss           0.0178515
Name: 12, dtype: object

In [59]:
metrics_dataframe[['Test Accuracy', 'Train Accuracy']].plot()

In [60]:
metrics_dataframe[['Test Loss', 'Train Loss']].plot()

In [None]:
torch.save(model.to("cpu").state_dict(),"mnist_medium.pth")
traced_medium = torch.jit.trace(model.to("cpu"), torch.Tensor(1,1,28,28))
traced_medium.save("medium.pth")