In [0]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [0]:
import dnn_model

In [1]:
class BatchNorm(nn.BatchNorm2d):
    def __init__(self, num_features, eps=1e-05, momentum=0.1, weight=True, bias=True):
        super().__init__(num_features, eps=eps, momentum=momentum)
        self.weight.data.fill_(1.0)
        self.bias.data.fill_(0.0)
        self.weight.requires_grad = weight
        self.bias.requires_grad = bias


class GhostBatchNorm(BatchNorm):
    def __init__(self, num_features, num_splits, **kw):
        super().__init__(num_features, **kw)
        self.num_splits = num_splits
        self.register_buffer('running_mean', torch.zeros(num_features * self.num_splits))
        self.register_buffer('running_var', torch.ones(num_features * self.num_splits))

    def train(self, mode=True):
        if (self.training is True) and (mode is False):  # lazily collate stats when we are going to use them
            self.running_mean = torch.mean(self.running_mean.view(self.num_splits, self.num_features), dim=0).repeat(
                self.num_splits)
            self.running_var = torch.mean(self.running_var.view(self.num_splits, self.num_features), dim=0).repeat(
                self.num_splits)
        return super().train(mode)

    def forward(self, input):
        N, C, H, W = input.shape
        if self.training or not self.track_running_stats:
            return F.batch_norm(
                input.view(-1, C * self.num_splits, H, W), self.running_mean, self.running_var,
                self.weight.repeat(self.num_splits), self.bias.repeat(self.num_splits),
                True, self.momentum, self.eps).view(N, C, H, W)
        else:
            return F.batch_norm(
                input, self.running_mean[:self.num_features], self.running_var[:self.num_features],
                self.weight, self.bias, False, self.momentum, self.eps)


NameError: ignored

In [3]:
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = dnn_model.Net1().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             160
           Dropout-2           [-1, 16, 26, 26]               0
            Conv2d-3           [-1, 10, 24, 24]           1,450
           Dropout-4           [-1, 10, 24, 24]               0
         MaxPool2d-5           [-1, 10, 12, 12]               0
       BatchNorm2d-6           [-1, 10, 12, 12]              20
            Conv2d-7           [-1, 10, 10, 10]             910
           Dropout-8           [-1, 10, 10, 10]               0
            Conv2d-9             [-1, 16, 8, 8]           1,456
          Dropout-10             [-1, 16, 8, 8]               0
      BatchNorm2d-11             [-1, 16, 8, 8]              32
           Conv2d-12             [-1, 16, 6, 6]           2,320
          Dropout-13             [-1, 16, 6, 6]               0
           Conv2d-14             [-1, 1

  return F.log_softmax(x)


In [4]:
from dataloader import TrainSetLoader

batch_size = 128
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
loader = TrainSetLoader(kwargs, batch_size)
train_loader = loader.data_loader("train", "MNIST")
test_loader = loader.data_loader("test", "MNIST")


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw
Processing...
Done!




In [0]:
# Net1 - without L1 & L2, With Batch normalization
from train import train_model
from test import test_model

# Session 5 final model
model = dnn_model.Net1().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 25):
    train_model(model, device, train_loader, optimizer, epoch)
    test_model(model, device, test_loader)

  return F.log_softmax(x)
loss=0.2893454134464264 batch_id=468 Accuracy=72.09: 100%|██████████| 469/469 [01:16<00:00,  6.15it/s]
loss=0.13874687254428864 batch_id=0 Accuracy=93.75:   0%|          | 1/469 [00:00<01:18,  5.94it/s]


Test set: Average loss: 0.1180, Accuracy: 9671/10000 (96.71%)



loss=0.12450223416090012 batch_id=468 Accuracy=96.24: 100%|██████████| 469/469 [01:15<00:00,  6.22it/s]
loss=0.04092518240213394 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:16,  6.14it/s]


Test set: Average loss: 0.0664, Accuracy: 9787/10000 (97.87%)



loss=0.02072138525545597 batch_id=468 Accuracy=97.47: 100%|██████████| 469/469 [01:16<00:00,  6.15it/s]
loss=0.10791319608688354 batch_id=0 Accuracy=98.44:   0%|          | 1/469 [00:00<01:16,  6.11it/s]


Test set: Average loss: 0.0548, Accuracy: 9828/10000 (98.28%)



loss=0.019239578396081924 batch_id=468 Accuracy=97.94: 100%|██████████| 469/469 [01:16<00:00,  6.16it/s]
loss=0.044883497059345245 batch_id=0 Accuracy=97.66:   0%|          | 1/469 [00:00<01:15,  6.20it/s]


Test set: Average loss: 0.0405, Accuracy: 9886/10000 (98.86%)



loss=0.07796728610992432 batch_id=468 Accuracy=98.20: 100%|██████████| 469/469 [01:15<00:00,  6.23it/s]
loss=0.06028338521718979 batch_id=0 Accuracy=96.88:   0%|          | 1/469 [00:00<01:15,  6.17it/s]


Test set: Average loss: 0.0386, Accuracy: 9880/10000 (98.80%)



loss=0.02924295701086521 batch_id=468 Accuracy=98.42: 100%|██████████| 469/469 [01:14<00:00,  6.26it/s]
loss=0.041071854531764984 batch_id=0 Accuracy=100.00:   0%|          | 1/469 [00:00<01:13,  6.33it/s]


Test set: Average loss: 0.0343, Accuracy: 9900/10000 (99.00%)



loss=0.03107149340212345 batch_id=468 Accuracy=98.44: 100%|██████████| 469/469 [01:14<00:00,  6.27it/s]
loss=0.08346288651227951 batch_id=0 Accuracy=97.66:   0%|          | 1/469 [00:00<01:14,  6.27it/s]


Test set: Average loss: 0.0329, Accuracy: 9894/10000 (98.94%)



loss=0.06480929255485535 batch_id=468 Accuracy=98.53: 100%|██████████| 469/469 [01:15<00:00,  6.25it/s]
loss=0.058981481939554214 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:14,  6.29it/s]


Test set: Average loss: 0.0290, Accuracy: 9906/10000 (99.06%)



loss=0.016332058236002922 batch_id=468 Accuracy=98.58: 100%|██████████| 469/469 [01:15<00:00,  6.20it/s]
loss=0.057359665632247925 batch_id=0 Accuracy=97.66:   0%|          | 1/469 [00:00<01:17,  6.01it/s]


Test set: Average loss: 0.0278, Accuracy: 9909/10000 (99.09%)



loss=0.07931821793317795 batch_id=468 Accuracy=98.69: 100%|██████████| 469/469 [01:15<00:00,  6.18it/s]
loss=0.028036100789904594 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:15,  6.18it/s]


Test set: Average loss: 0.0256, Accuracy: 9917/10000 (99.17%)



loss=0.02167467772960663 batch_id=468 Accuracy=98.70: 100%|██████████| 469/469 [01:16<00:00,  6.17it/s]
loss=0.0466059111058712 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:17,  6.05it/s]


Test set: Average loss: 0.0311, Accuracy: 9898/10000 (98.98%)



loss=0.009281736798584461 batch_id=468 Accuracy=98.81: 100%|██████████| 469/469 [01:15<00:00,  6.18it/s]
loss=0.02669500559568405 batch_id=0 Accuracy=97.66:   0%|          | 1/469 [00:00<01:16,  6.15it/s]


Test set: Average loss: 0.0286, Accuracy: 9915/10000 (99.15%)



loss=0.12622618675231934 batch_id=468 Accuracy=98.84: 100%|██████████| 469/469 [01:15<00:00,  6.24it/s]
loss=0.03183848038315773 batch_id=0 Accuracy=96.88:   0%|          | 1/469 [00:00<01:17,  6.06it/s]


Test set: Average loss: 0.0268, Accuracy: 9926/10000 (99.26%)



loss=0.018636683002114296 batch_id=468 Accuracy=98.84: 100%|██████████| 469/469 [01:14<00:00,  6.29it/s]
loss=0.01619480922818184 batch_id=0 Accuracy=100.00:   0%|          | 1/469 [00:00<01:14,  6.32it/s]


Test set: Average loss: 0.0277, Accuracy: 9917/10000 (99.17%)



loss=0.008850964717566967 batch_id=468 Accuracy=98.93: 100%|██████████| 469/469 [01:14<00:00,  6.30it/s]
loss=0.017067518085241318 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:13,  6.41it/s]


Test set: Average loss: 0.0249, Accuracy: 9917/10000 (99.17%)



loss=0.012088353745639324 batch_id=468 Accuracy=98.88: 100%|██████████| 469/469 [01:14<00:00,  6.31it/s]
loss=0.036707088351249695 batch_id=0 Accuracy=98.44:   0%|          | 1/469 [00:00<01:16,  6.10it/s]


Test set: Average loss: 0.0233, Accuracy: 9930/10000 (99.30%)



loss=0.012740515172481537 batch_id=468 Accuracy=98.91: 100%|██████████| 469/469 [01:15<00:00,  6.19it/s]
loss=0.03833167999982834 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:15,  6.18it/s]


Test set: Average loss: 0.0279, Accuracy: 9914/10000 (99.14%)



loss=0.011753992177546024 batch_id=468 Accuracy=98.95: 100%|██████████| 469/469 [01:16<00:00,  6.16it/s]
loss=0.036214105784893036 batch_id=0 Accuracy=96.09:   0%|          | 1/469 [00:00<01:19,  5.91it/s]


Test set: Average loss: 0.0243, Accuracy: 9924/10000 (99.24%)



loss=0.03398953005671501 batch_id=468 Accuracy=98.98: 100%|██████████| 469/469 [01:15<00:00,  6.19it/s]
loss=0.034597355872392654 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:15,  6.19it/s]


Test set: Average loss: 0.0268, Accuracy: 9915/10000 (99.15%)



loss=0.05684588849544525 batch_id=468 Accuracy=99.00: 100%|██████████| 469/469 [01:16<00:00,  6.09it/s]
loss=0.019993050023913383 batch_id=0 Accuracy=100.00:   0%|          | 1/469 [00:00<01:15,  6.18it/s]


Test set: Average loss: 0.0212, Accuracy: 9937/10000 (99.37%)



loss=0.08279276639223099 batch_id=468 Accuracy=99.05: 100%|██████████| 469/469 [01:16<00:00,  6.14it/s]
loss=0.08796704560518265 batch_id=0 Accuracy=98.44:   0%|          | 1/469 [00:00<01:17,  6.01it/s]


Test set: Average loss: 0.0258, Accuracy: 9914/10000 (99.14%)



loss=0.04518948122859001 batch_id=468 Accuracy=99.07: 100%|██████████| 469/469 [01:16<00:00,  6.16it/s]
loss=0.0429091639816761 batch_id=0 Accuracy=100.00:   0%|          | 1/469 [00:00<01:17,  6.07it/s]


Test set: Average loss: 0.0234, Accuracy: 9927/10000 (99.27%)



loss=0.04968872293829918 batch_id=468 Accuracy=99.03: 100%|██████████| 469/469 [01:16<00:00,  6.16it/s]
loss=0.024733087047934532 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:20,  5.78it/s]


Test set: Average loss: 0.0229, Accuracy: 9916/10000 (99.16%)



loss=0.022303065285086632 batch_id=468 Accuracy=99.06: 100%|██████████| 469/469 [01:16<00:00,  6.15it/s]



Test set: Average loss: 0.0239, Accuracy: 9931/10000 (99.31%)



In [0]:
# Net1 - with L2, with BN
from train import train_model
from test import test_model

# Session 5 final model
model = dnn_model.Net1().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0, dampening=0, weight_decay=0, nesterov=False)

for epoch in range(1, 25):
    train_model(model, device, train_loader, optimizer, epoch)
    test_model(model, device, test_loader)

  return F.log_softmax(x)
loss=2.159263849258423 batch_id=468 Accuracy=17.53: 100%|██████████| 469/469 [01:22<00:00,  5.71it/s]
loss=2.106940507888794 batch_id=0 Accuracy=19.53:   0%|          | 1/469 [00:00<01:20,  5.80it/s]


Test set: Average loss: 2.1149, Accuracy: 2606/10000 (26.06%)



loss=0.9036510586738586 batch_id=468 Accuracy=47.20: 100%|██████████| 469/469 [01:21<00:00,  5.74it/s]
loss=0.8817183375358582 batch_id=0 Accuracy=74.22:   0%|          | 1/469 [00:00<01:20,  5.85it/s]


Test set: Average loss: 0.8776, Accuracy: 7538/10000 (75.38%)



loss=0.3488036096096039 batch_id=468 Accuracy=82.37: 100%|██████████| 469/469 [01:21<00:00,  5.77it/s]
loss=0.3291482925415039 batch_id=0 Accuracy=90.62:   0%|          | 1/469 [00:00<01:22,  5.64it/s]


Test set: Average loss: 0.3527, Accuracy: 8983/10000 (89.83%)



loss=0.3351036310195923 batch_id=468 Accuracy=89.71: 100%|██████████| 469/469 [01:22<00:00,  5.65it/s]
loss=0.25621840357780457 batch_id=0 Accuracy=93.75:   0%|          | 1/469 [00:00<01:23,  5.60it/s]


Test set: Average loss: 0.2225, Accuracy: 9405/10000 (94.05%)



loss=0.2030869722366333 batch_id=468 Accuracy=92.49: 100%|██████████| 469/469 [01:21<00:00,  5.76it/s]
loss=0.21962915360927582 batch_id=0 Accuracy=93.75:   0%|          | 1/469 [00:00<01:19,  5.86it/s]


Test set: Average loss: 0.1693, Accuracy: 9532/10000 (95.32%)



loss=0.2062440663576126 batch_id=468 Accuracy=94.12: 100%|██████████| 469/469 [01:20<00:00,  5.85it/s]
loss=0.25239142775535583 batch_id=0 Accuracy=92.97:   0%|          | 1/469 [00:00<01:20,  5.79it/s]


Test set: Average loss: 0.1384, Accuracy: 9597/10000 (95.97%)



loss=0.1896650344133377 batch_id=468 Accuracy=95.14: 100%|██████████| 469/469 [01:20<00:00,  5.82it/s]
loss=0.15365813672542572 batch_id=0 Accuracy=92.97:   0%|          | 1/469 [00:00<01:21,  5.77it/s]


Test set: Average loss: 0.1195, Accuracy: 9644/10000 (96.44%)



loss=0.21281932294368744 batch_id=468 Accuracy=95.59: 100%|██████████| 469/469 [01:20<00:00,  5.81it/s]
loss=0.22240382432937622 batch_id=0 Accuracy=96.09:   0%|          | 1/469 [00:00<01:18,  5.94it/s]


Test set: Average loss: 0.1000, Accuracy: 9704/10000 (97.04%)



loss=0.11531659215688705 batch_id=468 Accuracy=96.16: 100%|██████████| 469/469 [01:20<00:00,  5.84it/s]
loss=0.27188950777053833 batch_id=0 Accuracy=91.41:   0%|          | 1/469 [00:00<01:26,  5.40it/s]


Test set: Average loss: 0.0881, Accuracy: 9747/10000 (97.47%)



loss=0.15947653353214264 batch_id=468 Accuracy=96.41: 100%|██████████| 469/469 [01:21<00:00,  5.78it/s]
loss=0.08630824089050293 batch_id=0 Accuracy=98.44:   0%|          | 1/469 [00:00<01:19,  5.91it/s]


Test set: Average loss: 0.0854, Accuracy: 9737/10000 (97.37%)



loss=0.0697844997048378 batch_id=468 Accuracy=96.67: 100%|██████████| 469/469 [01:21<00:00,  5.78it/s]
loss=0.11552000045776367 batch_id=0 Accuracy=95.31:   0%|          | 1/469 [00:00<01:28,  5.26it/s]


Test set: Average loss: 0.0792, Accuracy: 9764/10000 (97.64%)



loss=0.04733434319496155 batch_id=468 Accuracy=96.93: 100%|██████████| 469/469 [01:21<00:00,  5.79it/s]
loss=0.08739109337329865 batch_id=0 Accuracy=95.31:   0%|          | 1/469 [00:00<01:23,  5.61it/s]


Test set: Average loss: 0.0707, Accuracy: 9789/10000 (97.89%)



loss=0.1337984949350357 batch_id=468 Accuracy=97.12: 100%|██████████| 469/469 [01:20<00:00,  5.84it/s]
loss=0.11143824458122253 batch_id=0 Accuracy=98.44:   0%|          | 1/469 [00:00<01:21,  5.78it/s]


Test set: Average loss: 0.0669, Accuracy: 9803/10000 (98.03%)



loss=0.033572811633348465 batch_id=468 Accuracy=97.23: 100%|██████████| 469/469 [01:20<00:00,  5.79it/s]
loss=0.08407726138830185 batch_id=0 Accuracy=97.66:   0%|          | 1/469 [00:00<01:21,  5.75it/s]


Test set: Average loss: 0.0616, Accuracy: 9811/10000 (98.11%)



loss=0.06424305588006973 batch_id=468 Accuracy=97.39: 100%|██████████| 469/469 [01:21<00:00,  5.76it/s]
loss=0.053372547030448914 batch_id=0 Accuracy=96.88:   0%|          | 1/469 [00:00<01:19,  5.91it/s]


Test set: Average loss: 0.0594, Accuracy: 9827/10000 (98.27%)



loss=0.06346604973077774 batch_id=468 Accuracy=97.58: 100%|██████████| 469/469 [01:21<00:00,  5.78it/s]
loss=0.11559329926967621 batch_id=0 Accuracy=97.66:   0%|          | 1/469 [00:00<01:18,  5.93it/s]


Test set: Average loss: 0.0512, Accuracy: 9844/10000 (98.44%)



loss=0.07963082194328308 batch_id=468 Accuracy=97.72: 100%|██████████| 469/469 [01:20<00:00,  5.80it/s]
loss=0.08205882459878922 batch_id=0 Accuracy=96.88:   0%|          | 1/469 [00:00<01:26,  5.38it/s]


Test set: Average loss: 0.0551, Accuracy: 9825/10000 (98.25%)



loss=0.06147719919681549 batch_id=468 Accuracy=97.85: 100%|██████████| 469/469 [01:21<00:00,  5.76it/s]
loss=0.06696461886167526 batch_id=0 Accuracy=96.09:   0%|          | 1/469 [00:00<01:16,  6.09it/s]


Test set: Average loss: 0.0520, Accuracy: 9839/10000 (98.39%)



loss=0.05395066738128662 batch_id=468 Accuracy=97.89: 100%|██████████| 469/469 [01:21<00:00,  5.78it/s]
loss=0.037175968289375305 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:21,  5.76it/s]


Test set: Average loss: 0.0499, Accuracy: 9842/10000 (98.42%)



loss=0.07339072972536087 batch_id=468 Accuracy=97.98: 100%|██████████| 469/469 [01:21<00:00,  5.77it/s]
loss=0.04011828079819679 batch_id=0 Accuracy=98.44:   0%|          | 1/469 [00:00<01:19,  5.86it/s]


Test set: Average loss: 0.0440, Accuracy: 9861/10000 (98.61%)



loss=0.08358017355203629 batch_id=468 Accuracy=98.09: 100%|██████████| 469/469 [01:21<00:00,  5.73it/s]
loss=0.060976848006248474 batch_id=0 Accuracy=96.88:   0%|          | 1/469 [00:00<01:21,  5.77it/s]


Test set: Average loss: 0.0433, Accuracy: 9860/10000 (98.60%)



loss=0.0658772811293602 batch_id=468 Accuracy=98.09: 100%|██████████| 469/469 [01:20<00:00,  5.79it/s]
loss=0.08902455866336823 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:24,  5.53it/s]


Test set: Average loss: 0.0409, Accuracy: 9869/10000 (98.69%)



loss=0.15186090767383575 batch_id=468 Accuracy=98.07: 100%|██████████| 469/469 [01:21<00:00,  5.75it/s]
loss=0.046191733330488205 batch_id=0 Accuracy=97.66:   0%|          | 1/469 [00:00<01:19,  5.92it/s]


Test set: Average loss: 0.0425, Accuracy: 9869/10000 (98.69%)



loss=0.04372880980372429 batch_id=468 Accuracy=98.15: 100%|██████████| 469/469 [01:20<00:00,  5.80it/s]



Test set: Average loss: 0.0385, Accuracy: 9882/10000 (98.82%)



In [5]:
# Net1 - with L2, with BN - weight_decay=5e-4
from train import train_model
from test import test_model

# Session 5 final model
model = dnn_model.Net1().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0, dampening=0, weight_decay=5e-4, nesterov=False)

for epoch in range(1, 25):
    train_model(model, device, train_loader, optimizer, epoch)
    test_model(model, device, test_loader)

  return F.log_softmax(x)
loss=2.1726458072662354 batch_id=468 Accuracy=17.58: 100%|██████████| 469/469 [00:24<00:00, 19.24it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 2.1188, Accuracy: 2635/10000 (26.35%)



loss=0.9015507698059082 batch_id=468 Accuracy=47.34: 100%|██████████| 469/469 [00:24<00:00, 18.99it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.8719, Accuracy: 7456/10000 (74.56%)



loss=0.40407347679138184 batch_id=468 Accuracy=82.74: 100%|██████████| 469/469 [00:24<00:00, 19.44it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.3433, Accuracy: 9035/10000 (90.35%)



loss=0.3085072934627533 batch_id=468 Accuracy=90.02: 100%|██████████| 469/469 [00:24<00:00, 19.19it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.2423, Accuracy: 9315/10000 (93.15%)



loss=0.2609136402606964 batch_id=468 Accuracy=92.62: 100%|██████████| 469/469 [00:24<00:00, 19.14it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.1784, Accuracy: 9507/10000 (95.07%)



loss=0.15644492208957672 batch_id=468 Accuracy=94.21: 100%|██████████| 469/469 [00:24<00:00, 19.22it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.1313, Accuracy: 9646/10000 (96.46%)



loss=0.0874258354306221 batch_id=468 Accuracy=94.93: 100%|██████████| 469/469 [00:24<00:00, 19.35it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.1118, Accuracy: 9665/10000 (96.65%)



loss=0.20889289677143097 batch_id=468 Accuracy=95.61: 100%|██████████| 469/469 [00:24<00:00, 19.23it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.1034, Accuracy: 9702/10000 (97.02%)



loss=0.09241489320993423 batch_id=468 Accuracy=96.06: 100%|██████████| 469/469 [00:24<00:00, 19.15it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0917, Accuracy: 9745/10000 (97.45%)



loss=0.09078372269868851 batch_id=468 Accuracy=96.37: 100%|██████████| 469/469 [00:24<00:00, 19.17it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0798, Accuracy: 9763/10000 (97.63%)



loss=0.058983754366636276 batch_id=468 Accuracy=96.60: 100%|██████████| 469/469 [00:24<00:00, 19.09it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0749, Accuracy: 9779/10000 (97.79%)



loss=0.1972455233335495 batch_id=468 Accuracy=96.87: 100%|██████████| 469/469 [00:24<00:00, 19.35it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0853, Accuracy: 9750/10000 (97.50%)



loss=0.12544399499893188 batch_id=468 Accuracy=97.21: 100%|██████████| 469/469 [00:24<00:00, 19.21it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0652, Accuracy: 9800/10000 (98.00%)



loss=0.06665977835655212 batch_id=468 Accuracy=97.25: 100%|██████████| 469/469 [00:24<00:00, 19.20it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0594, Accuracy: 9820/10000 (98.20%)



loss=0.029975155368447304 batch_id=468 Accuracy=97.44: 100%|██████████| 469/469 [00:24<00:00, 19.21it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0606, Accuracy: 9817/10000 (98.17%)



loss=0.04101727530360222 batch_id=468 Accuracy=97.56: 100%|██████████| 469/469 [00:24<00:00, 19.34it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0546, Accuracy: 9837/10000 (98.37%)



loss=0.17033617198467255 batch_id=468 Accuracy=97.64: 100%|██████████| 469/469 [00:24<00:00, 19.26it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0552, Accuracy: 9830/10000 (98.30%)



loss=0.027957608923316002 batch_id=468 Accuracy=97.75: 100%|██████████| 469/469 [00:24<00:00, 19.31it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0507, Accuracy: 9844/10000 (98.44%)



loss=0.08261483162641525 batch_id=468 Accuracy=97.85: 100%|██████████| 469/469 [00:24<00:00, 19.38it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0485, Accuracy: 9848/10000 (98.48%)



loss=0.05501609668135643 batch_id=468 Accuracy=97.92: 100%|██████████| 469/469 [00:24<00:00, 19.40it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0439, Accuracy: 9857/10000 (98.57%)



loss=0.0608612596988678 batch_id=468 Accuracy=97.99: 100%|██████████| 469/469 [00:24<00:00, 19.52it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0464, Accuracy: 9851/10000 (98.51%)



loss=0.07234517484903336 batch_id=468 Accuracy=98.08: 100%|██████████| 469/469 [00:24<00:00, 19.33it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0441, Accuracy: 9870/10000 (98.70%)



loss=0.04532359912991524 batch_id=468 Accuracy=98.13: 100%|██████████| 469/469 [00:24<00:00, 19.34it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0444, Accuracy: 9861/10000 (98.61%)



loss=0.0793975219130516 batch_id=468 Accuracy=98.15: 100%|██████████| 469/469 [00:24<00:00, 19.43it/s]



Test set: Average loss: 0.0423, Accuracy: 9877/10000 (98.77%)



In [0]:
# Net1 - with L1, With Batch normalization
from train import train_model
from test import test_model

# Session 5 final model
model = dnn_model.Net1().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 2):
    train_model(model, device, train_loader, optimizer, epoch, "L1")
    test_model(model, device, test_loader)

  return F.log_softmax(x)
loss=0.5337293148040771 batch_id=468 Accuracy=71.70: 100%|██████████| 469/469 [01:25<00:00,  5.48it/s]



Test set: Average loss: 0.1589, Accuracy: 9566/10000 (95.66%)



In [0]:
# Net1 - with L1, With Batch normalization
from train import train_model
from test import test_model

# Session 5 final model
model = dnn_model.Net1().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 25):
    train_model(model, device, train_loader, optimizer, epoch, "L1")
    test_model(model, device, test_loader)

  return F.log_softmax(x)
loss=0.5337293148040771 batch_id=468 Accuracy=71.70: 100%|██████████| 469/469 [01:25<00:00,  5.50it/s]
loss=0.3812520503997803 batch_id=0 Accuracy=91.41:   0%|          | 1/469 [00:00<01:25,  5.50it/s]


Test set: Average loss: 0.1589, Accuracy: 9566/10000 (95.66%)



loss=0.29899686574935913 batch_id=468 Accuracy=96.16: 100%|██████████| 469/469 [01:26<00:00,  5.44it/s]
loss=0.28444990515708923 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:21,  5.74it/s]


Test set: Average loss: 0.0839, Accuracy: 9746/10000 (97.46%)



loss=0.23820088803768158 batch_id=468 Accuracy=97.06: 100%|██████████| 469/469 [01:26<00:00,  5.42it/s]
loss=0.2842409312725067 batch_id=0 Accuracy=96.88:   0%|          | 1/469 [00:00<01:22,  5.66it/s]


Test set: Average loss: 0.0684, Accuracy: 9802/10000 (98.02%)



loss=0.24949359893798828 batch_id=468 Accuracy=97.42: 100%|██████████| 469/469 [01:26<00:00,  5.43it/s]
loss=0.256767213344574 batch_id=0 Accuracy=96.88:   0%|          | 1/469 [00:00<01:29,  5.23it/s]


Test set: Average loss: 0.0558, Accuracy: 9841/10000 (98.41%)



loss=0.2692250609397888 batch_id=468 Accuracy=97.71: 100%|██████████| 469/469 [01:25<00:00,  5.51it/s]
loss=0.2296978086233139 batch_id=0 Accuracy=96.88:   0%|          | 1/469 [00:00<01:24,  5.52it/s]


Test set: Average loss: 0.0569, Accuracy: 9841/10000 (98.41%)



loss=0.1975247710943222 batch_id=468 Accuracy=97.75: 100%|██████████| 469/469 [01:25<00:00,  5.47it/s]
loss=0.2265634834766388 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:26,  5.44it/s]


Test set: Average loss: 0.0607, Accuracy: 9819/10000 (98.19%)



loss=0.21705561876296997 batch_id=468 Accuracy=97.82: 100%|██████████| 469/469 [01:25<00:00,  5.51it/s]
loss=0.31787633895874023 batch_id=0 Accuracy=96.09:   0%|          | 1/469 [00:00<01:24,  5.52it/s]


Test set: Average loss: 0.0727, Accuracy: 9780/10000 (97.80%)



loss=0.22350674867630005 batch_id=468 Accuracy=97.84: 100%|██████████| 469/469 [01:25<00:00,  5.50it/s]
loss=0.21314159035682678 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:23,  5.61it/s]


Test set: Average loss: 0.0450, Accuracy: 9869/10000 (98.69%)



loss=0.17169950902462006 batch_id=468 Accuracy=97.88: 100%|██████████| 469/469 [01:25<00:00,  5.48it/s]
loss=0.2434038370847702 batch_id=0 Accuracy=96.09:   0%|          | 1/469 [00:00<01:26,  5.43it/s]


Test set: Average loss: 0.0466, Accuracy: 9868/10000 (98.68%)



loss=0.2590324580669403 batch_id=468 Accuracy=97.99: 100%|██████████| 469/469 [01:26<00:00,  5.44it/s]
loss=0.2021748125553131 batch_id=0 Accuracy=98.44:   0%|          | 1/469 [00:00<01:24,  5.52it/s]


Test set: Average loss: 0.0536, Accuracy: 9844/10000 (98.44%)



loss=0.22270318865776062 batch_id=468 Accuracy=97.98: 100%|██████████| 469/469 [01:26<00:00,  5.45it/s]
loss=0.26595938205718994 batch_id=0 Accuracy=97.66:   0%|          | 1/469 [00:00<01:23,  5.63it/s]


Test set: Average loss: 0.0469, Accuracy: 9863/10000 (98.63%)



loss=0.19361184537410736 batch_id=468 Accuracy=97.98: 100%|██████████| 469/469 [01:25<00:00,  5.46it/s]
loss=0.21542048454284668 batch_id=0 Accuracy=97.66:   0%|          | 1/469 [00:00<01:23,  5.59it/s]


Test set: Average loss: 0.0819, Accuracy: 9745/10000 (97.45%)



loss=0.27331310510635376 batch_id=468 Accuracy=97.97: 100%|██████████| 469/469 [01:25<00:00,  5.48it/s]
loss=0.24247261881828308 batch_id=0 Accuracy=96.88:   0%|          | 1/469 [00:00<01:21,  5.76it/s]


Test set: Average loss: 0.0614, Accuracy: 9802/10000 (98.02%)



loss=0.20700541138648987 batch_id=468 Accuracy=97.87: 100%|██████████| 469/469 [01:24<00:00,  5.52it/s]
loss=0.18002671003341675 batch_id=0 Accuracy=98.44:   0%|          | 1/469 [00:00<01:25,  5.46it/s]


Test set: Average loss: 0.0547, Accuracy: 9838/10000 (98.38%)



loss=0.1885739117860794 batch_id=468 Accuracy=97.94: 100%|██████████| 469/469 [01:25<00:00,  5.49it/s]
loss=0.2154296636581421 batch_id=0 Accuracy=97.66:   0%|          | 1/469 [00:00<01:23,  5.62it/s]


Test set: Average loss: 0.0510, Accuracy: 9843/10000 (98.43%)



loss=0.16195917129516602 batch_id=468 Accuracy=98.08: 100%|██████████| 469/469 [01:27<00:00,  5.39it/s]
loss=0.23414728045463562 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:23,  5.63it/s]


Test set: Average loss: 0.0589, Accuracy: 9814/10000 (98.14%)



loss=0.24828162789344788 batch_id=468 Accuracy=98.08: 100%|██████████| 469/469 [01:26<00:00,  5.40it/s]
loss=0.19168098270893097 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:23,  5.63it/s]


Test set: Average loss: 0.0415, Accuracy: 9869/10000 (98.69%)



loss=0.20895591378211975 batch_id=468 Accuracy=98.20: 100%|██████████| 469/469 [01:26<00:00,  5.45it/s]
loss=0.26792070269584656 batch_id=0 Accuracy=96.09:   0%|          | 1/469 [00:00<01:27,  5.34it/s]


Test set: Average loss: 0.0503, Accuracy: 9845/10000 (98.45%)



loss=0.19342423975467682 batch_id=468 Accuracy=98.07: 100%|██████████| 469/469 [01:25<00:00,  5.47it/s]
loss=0.2208549529314041 batch_id=0 Accuracy=98.44:   0%|          | 1/469 [00:00<01:28,  5.27it/s]


Test set: Average loss: 0.0598, Accuracy: 9819/10000 (98.19%)



loss=0.19817152619361877 batch_id=468 Accuracy=98.16: 100%|██████████| 469/469 [01:25<00:00,  5.47it/s]
loss=0.19194811582565308 batch_id=0 Accuracy=99.22:   0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0609, Accuracy: 9813/10000 (98.13%)



loss=0.2846881151199341 batch_id=468 Accuracy=98.10: 100%|██████████| 469/469 [01:25<00:00,  5.50it/s]
loss=0.2387419044971466 batch_id=0 Accuracy=96.09:   0%|          | 1/469 [00:00<01:24,  5.54it/s]


Test set: Average loss: 0.0435, Accuracy: 9878/10000 (98.78%)



loss=0.21166691184043884 batch_id=468 Accuracy=98.10: 100%|██████████| 469/469 [01:25<00:00,  5.48it/s]
loss=0.22258037328720093 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:23,  5.64it/s]


Test set: Average loss: 0.0544, Accuracy: 9835/10000 (98.35%)



loss=0.25099697709083557 batch_id=468 Accuracy=98.06: 100%|██████████| 469/469 [01:25<00:00,  5.52it/s]
loss=0.19664454460144043 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:21,  5.72it/s]


Test set: Average loss: 0.0674, Accuracy: 9797/10000 (97.97%)



loss=0.16954964399337769 batch_id=468 Accuracy=98.09: 100%|██████████| 469/469 [01:24<00:00,  5.52it/s]



Test set: Average loss: 0.0477, Accuracy: 9856/10000 (98.56%)



In [0]:
# Net1 - with L1 & L2, with BN
from train import train_model
from test import test_model

# Session 5 final model
model = dnn_model.Net1().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0, dampening=0, weight_decay=0, nesterov=False)

for epoch in range(1, 25):
    train_model(model, device, train_loader, optimizer, epoch, 'L1')
    test_model(model, device, test_loader)

  return F.log_softmax(x)
loss=2.392559289932251 batch_id=468 Accuracy=17.49: 100%|██████████| 469/469 [01:25<00:00,  5.50it/s]
loss=2.349015951156616 batch_id=0 Accuracy=18.75:   0%|          | 1/469 [00:00<01:25,  5.46it/s]


Test set: Average loss: 2.1371, Accuracy: 2594/10000 (25.94%)



loss=1.2016850709915161 batch_id=468 Accuracy=45.18: 100%|██████████| 469/469 [01:25<00:00,  5.51it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.9352, Accuracy: 7383/10000 (73.83%)



loss=0.5858258008956909 batch_id=468 Accuracy=81.87: 100%|██████████| 469/469 [01:25<00:00,  5.48it/s]
loss=0.5638421177864075 batch_id=0 Accuracy=90.62:   0%|          | 1/469 [00:00<01:23,  5.57it/s]


Test set: Average loss: 0.3648, Accuracy: 8947/10000 (89.47%)



loss=0.5721626877784729 batch_id=468 Accuracy=89.73: 100%|██████████| 469/469 [01:25<00:00,  5.47it/s]
loss=0.4984756112098694 batch_id=0 Accuracy=92.19:   0%|          | 1/469 [00:00<01:22,  5.64it/s]


Test set: Average loss: 0.2360, Accuracy: 9366/10000 (93.66%)



loss=0.43807631731033325 batch_id=468 Accuracy=92.56: 100%|██████████| 469/469 [01:25<00:00,  5.50it/s]
loss=0.4685309827327728 batch_id=0 Accuracy=92.97:   0%|          | 1/469 [00:00<01:28,  5.28it/s]


Test set: Average loss: 0.1785, Accuracy: 9507/10000 (95.07%)



loss=0.42101019620895386 batch_id=468 Accuracy=94.11: 100%|██████████| 469/469 [01:24<00:00,  5.56it/s]
loss=0.4824026823043823 batch_id=0 Accuracy=93.75:   0%|          | 1/469 [00:00<01:21,  5.71it/s]


Test set: Average loss: 0.1462, Accuracy: 9596/10000 (95.96%)



loss=0.42167866230010986 batch_id=468 Accuracy=95.19: 100%|██████████| 469/469 [01:25<00:00,  5.49it/s]
loss=0.37264198064804077 batch_id=0 Accuracy=95.31:   0%|          | 1/469 [00:00<01:28,  5.27it/s]


Test set: Average loss: 0.1294, Accuracy: 9618/10000 (96.18%)



loss=0.4210740327835083 batch_id=468 Accuracy=95.66: 100%|██████████| 469/469 [01:25<00:00,  5.48it/s]
loss=0.4279828667640686 batch_id=0 Accuracy=96.09:   0%|          | 1/469 [00:00<01:26,  5.43it/s]


Test set: Average loss: 0.1070, Accuracy: 9690/10000 (96.90%)



loss=0.31368234753608704 batch_id=468 Accuracy=96.19: 100%|██████████| 469/469 [01:24<00:00,  5.57it/s]
loss=0.4693637490272522 batch_id=0 Accuracy=91.41:   0%|          | 1/469 [00:00<01:25,  5.44it/s]


Test set: Average loss: 0.1032, Accuracy: 9709/10000 (97.09%)



loss=0.37446129322052 batch_id=468 Accuracy=96.44: 100%|██████████| 469/469 [01:24<00:00,  5.53it/s]
loss=0.29261109232902527 batch_id=0 Accuracy=98.44:   0%|          | 1/469 [00:00<01:25,  5.44it/s]


Test set: Average loss: 0.0839, Accuracy: 9761/10000 (97.61%)



loss=0.2771334648132324 batch_id=468 Accuracy=96.75: 100%|██████████| 469/469 [01:25<00:00,  5.51it/s]
loss=0.3299824893474579 batch_id=0 Accuracy=96.88:   0%|          | 1/469 [00:00<01:24,  5.54it/s]


Test set: Average loss: 0.0791, Accuracy: 9777/10000 (97.77%)



loss=0.27744975686073303 batch_id=468 Accuracy=97.06: 100%|██████████| 469/469 [01:19<00:00,  5.91it/s]
loss=0.2853933274745941 batch_id=0 Accuracy=96.88:   0%|          | 1/469 [00:00<01:18,  5.97it/s]


Test set: Average loss: 0.0714, Accuracy: 9803/10000 (98.03%)



loss=0.3348570764064789 batch_id=468 Accuracy=97.20: 100%|██████████| 469/469 [01:15<00:00,  6.22it/s]
loss=0.34860312938690186 batch_id=0 Accuracy=96.09:   0%|          | 1/469 [00:00<01:15,  6.21it/s]


Test set: Average loss: 0.0648, Accuracy: 9832/10000 (98.32%)



loss=0.23860330879688263 batch_id=468 Accuracy=97.34: 100%|██████████| 469/469 [01:16<00:00,  6.16it/s]
loss=0.257891446352005 batch_id=0 Accuracy=98.44:   0%|          | 1/469 [00:00<01:18,  5.93it/s]


Test set: Average loss: 0.0683, Accuracy: 9816/10000 (98.16%)



loss=0.2471635937690735 batch_id=468 Accuracy=97.47: 100%|██████████| 469/469 [01:15<00:00,  6.21it/s]
loss=0.24469071626663208 batch_id=0 Accuracy=97.66:   0%|          | 1/469 [00:00<01:14,  6.24it/s]


Test set: Average loss: 0.0606, Accuracy: 9841/10000 (98.41%)



loss=0.23795898258686066 batch_id=468 Accuracy=97.58: 100%|██████████| 469/469 [01:15<00:00,  6.22it/s]
loss=0.2774806618690491 batch_id=0 Accuracy=98.44:   0%|          | 1/469 [00:00<01:14,  6.30it/s]


Test set: Average loss: 0.0531, Accuracy: 9849/10000 (98.49%)



loss=0.2584306299686432 batch_id=468 Accuracy=97.79: 100%|██████████| 469/469 [01:15<00:00,  6.20it/s]
loss=0.2679199278354645 batch_id=0 Accuracy=98.44:   0%|          | 1/469 [00:00<01:18,  5.95it/s]


Test set: Average loss: 0.0529, Accuracy: 9845/10000 (98.45%)



loss=0.2466563880443573 batch_id=468 Accuracy=97.78: 100%|██████████| 469/469 [01:15<00:00,  6.21it/s]
loss=0.2571853995323181 batch_id=0 Accuracy=96.88:   0%|          | 1/469 [00:00<01:13,  6.33it/s]


Test set: Average loss: 0.0617, Accuracy: 9835/10000 (98.35%)



loss=0.2552502155303955 batch_id=468 Accuracy=97.90: 100%|██████████| 469/469 [01:19<00:00,  5.91it/s]
loss=0.24396859109401703 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:14,  6.26it/s]


Test set: Average loss: 0.0531, Accuracy: 9848/10000 (98.48%)



loss=0.23711203038692474 batch_id=468 Accuracy=97.96: 100%|██████████| 469/469 [01:15<00:00,  6.20it/s]
loss=0.22594106197357178 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:13,  6.39it/s]


Test set: Average loss: 0.0508, Accuracy: 9868/10000 (98.68%)



loss=0.2960471510887146 batch_id=468 Accuracy=97.99: 100%|██████████| 469/469 [01:15<00:00,  6.23it/s]
loss=0.2620832026004791 batch_id=0 Accuracy=96.88:   0%|          | 1/469 [00:00<01:13,  6.40it/s]


Test set: Average loss: 0.0636, Accuracy: 9810/10000 (98.10%)



loss=0.28348398208618164 batch_id=468 Accuracy=98.00: 100%|██████████| 469/469 [01:15<00:00,  6.20it/s]
loss=0.25831907987594604 batch_id=0 Accuracy=99.22:   0%|          | 1/469 [00:00<01:13,  6.41it/s]


Test set: Average loss: 0.0481, Accuracy: 9865/10000 (98.65%)



loss=0.3000286817550659 batch_id=468 Accuracy=98.06: 100%|██████████| 469/469 [01:15<00:00,  6.20it/s]
loss=0.2138294279575348 batch_id=0 Accuracy=98.44:   0%|          | 1/469 [00:00<01:12,  6.43it/s]


Test set: Average loss: 0.0488, Accuracy: 9883/10000 (98.83%)



loss=0.23853173851966858 batch_id=468 Accuracy=98.05: 100%|██████████| 469/469 [01:15<00:00,  6.17it/s]



Test set: Average loss: 0.0469, Accuracy: 9870/10000 (98.70%)



In [6]:
# Net1 - with L1 & L2, with BN
from train import train_model
from test import test_model

# Session 5 final model
model = dnn_model.Net1().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0, dampening=0, weight_decay=5e-3, nesterov=False)

for epoch in range(1, 25):
    train_model(model, device, train_loader, optimizer, epoch, 'L1')
    test_model(model, device, test_loader)

  return F.log_softmax(x)
loss=2.060058832168579 batch_id=468 Accuracy=23.82: 100%|██████████| 469/469 [00:25<00:00, 18.16it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 1.9571, Accuracy: 3135/10000 (31.35%)



loss=0.943038284778595 batch_id=468 Accuracy=57.77: 100%|██████████| 469/469 [00:25<00:00, 18.44it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.8292, Accuracy: 7565/10000 (75.65%)



loss=0.6774700880050659 batch_id=468 Accuracy=81.72: 100%|██████████| 469/469 [00:25<00:00, 18.39it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.4078, Accuracy: 8730/10000 (87.30%)



loss=0.6094377636909485 batch_id=468 Accuracy=89.01: 100%|██████████| 469/469 [00:25<00:00, 18.29it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.3282, Accuracy: 9044/10000 (90.44%)



loss=0.5370624661445618 batch_id=468 Accuracy=92.03: 100%|██████████| 469/469 [00:25<00:00, 18.46it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.2303, Accuracy: 9364/10000 (93.64%)



loss=0.4119809865951538 batch_id=468 Accuracy=93.47: 100%|██████████| 469/469 [00:25<00:00, 18.39it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.1728, Accuracy: 9545/10000 (95.45%)



loss=0.3216409385204315 batch_id=468 Accuracy=94.40: 100%|██████████| 469/469 [00:25<00:00, 18.38it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.1549, Accuracy: 9617/10000 (96.17%)



loss=0.4236612915992737 batch_id=468 Accuracy=95.18: 100%|██████████| 469/469 [00:25<00:00, 18.60it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.1290, Accuracy: 9665/10000 (96.65%)



loss=0.32478663325309753 batch_id=468 Accuracy=95.66: 100%|██████████| 469/469 [00:25<00:00, 18.32it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.1137, Accuracy: 9696/10000 (96.96%)



loss=0.3397151231765747 batch_id=468 Accuracy=96.01: 100%|██████████| 469/469 [00:25<00:00, 18.42it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.1186, Accuracy: 9706/10000 (97.06%)



loss=0.24890030920505524 batch_id=468 Accuracy=96.34: 100%|██████████| 469/469 [00:25<00:00, 18.41it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0989, Accuracy: 9756/10000 (97.56%)



loss=0.36417442560195923 batch_id=468 Accuracy=96.49: 100%|██████████| 469/469 [00:25<00:00, 18.36it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.1061, Accuracy: 9759/10000 (97.59%)



loss=0.26584556698799133 batch_id=468 Accuracy=96.67: 100%|██████████| 469/469 [00:25<00:00, 18.45it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0911, Accuracy: 9774/10000 (97.74%)



loss=0.23220908641815186 batch_id=468 Accuracy=96.76: 100%|██████████| 469/469 [00:25<00:00, 18.35it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0833, Accuracy: 9790/10000 (97.90%)



loss=0.24173010885715485 batch_id=468 Accuracy=96.94: 100%|██████████| 469/469 [00:25<00:00, 18.25it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0914, Accuracy: 9744/10000 (97.44%)



loss=0.27599653601646423 batch_id=468 Accuracy=96.96: 100%|██████████| 469/469 [00:25<00:00, 18.17it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0852, Accuracy: 9814/10000 (98.14%)



loss=0.2650025188922882 batch_id=468 Accuracy=97.12: 100%|██████████| 469/469 [00:25<00:00, 18.42it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0912, Accuracy: 9763/10000 (97.63%)



loss=0.25713562965393066 batch_id=468 Accuracy=97.13: 100%|██████████| 469/469 [00:25<00:00, 18.41it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0881, Accuracy: 9779/10000 (97.79%)



loss=0.2450176179409027 batch_id=468 Accuracy=97.19: 100%|██████████| 469/469 [00:25<00:00, 18.33it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0892, Accuracy: 9782/10000 (97.82%)



loss=0.24552170932292938 batch_id=468 Accuracy=97.31: 100%|██████████| 469/469 [00:25<00:00, 18.20it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.1127, Accuracy: 9693/10000 (96.93%)



loss=0.23210552334785461 batch_id=468 Accuracy=97.36: 100%|██████████| 469/469 [00:25<00:00, 18.46it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0868, Accuracy: 9793/10000 (97.93%)



loss=0.2313804030418396 batch_id=468 Accuracy=97.42: 100%|██████████| 469/469 [00:25<00:00, 18.27it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0947, Accuracy: 9773/10000 (97.73%)



loss=0.2191462218761444 batch_id=468 Accuracy=97.44: 100%|██████████| 469/469 [00:25<00:00, 18.16it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0793, Accuracy: 9816/10000 (98.16%)



loss=0.27482134103775024 batch_id=468 Accuracy=97.43: 100%|██████████| 469/469 [00:25<00:00, 18.27it/s]



Test set: Average loss: 0.0759, Accuracy: 9811/10000 (98.11%)

