In [1]:
import torch
import torchvision as tv
import torchsummary

In [2]:
samples0, samples1 = 60000, 10000
classes = 10

source0 = tv.datasets.MNIST("../../MNIST", train = True, download = False)
source1 = tv.datasets.MNIST("../../MNIST", train = False, download = False)
DATA0 = source0.data.unsqueeze(1).float().cuda()
DATA1 = source1.data.unsqueeze(1).float().cuda()
TARGET0 = source0.targets.cuda()
TARGET1 = source1.targets.cuda()

print(DATA0[1].shape)

torch.Size([1, 28, 28])


In [3]:
def train(model, verbose = False, epochs = 100):
    variables = model.parameters()
    batch = 1000
    acc_max, epoch_max = 0, 0
    optimizer = torch.optim.Adam(variables)
    
    for epoch in range(epochs):
        LOSS0 = torch.zeros((), device = "cuda")
        ACCURACY0 = torch.zeros((), device = "cuda")
        count0 = 0
        model.train()
        for index in range(0, samples0, batch):
            optimizer.zero_grad()
            DATA = DATA0[index : index + batch]
            TARGET = TARGET0[index : index + batch]
            count = TARGET.size(0)
            ACTIVATION = model(DATA)
            LOSS = torch.nn.functional.cross_entropy(ACTIVATION, TARGET)
            LOSS0 += LOSS * count
            VALUE = torch.argmax(ACTIVATION, 1)
            ACCURACY0 += torch.sum(VALUE == TARGET)
            count0 += count
            LOSS.backward()
            optimizer.step()
            
        LOSS0 /= count0
        ACCURACY0 /= count0
        model.eval()
        with torch.no_grad():
            LOSS1 = torch.zeros((), device = "cuda")
            ACCURACY1 = torch.zeros((), device = "cuda")
            count1 = 0
            for index in range(0, samples1, batch):
                DATA = DATA1[index : index + batch]
                TARGET = TARGET1[index : index + batch]
                ACTIVATION = model(DATA)
                LOSS1 += torch.nn.functional.cross_entropy(ACTIVATION, TARGET, reduction = "sum")
                VALUE = torch.argmax(ACTIVATION, 1)
                ACCURACY1 += torch.sum(VALUE == TARGET)
                count1 += TARGET.size(0)
            LOSS1 /= count1
            ACCURACY1 /= count1
            if ACCURACY1 > acc_max:
                acc_max = ACCURACY1
                epoch_max = epoch
        if verbose:
            print("%5d %12.3f %4.3f %12.3f %4.3f" % \
                  (epoch, LOSS0, ACCURACY0, LOSS1, ACCURACY1), flush = True)
        
    print("Max accuracy %4.3f at %5d epoch" % (acc_max, epoch))

In [4]:
model1 = torch.nn.Sequential(
    torch.nn.Conv2d(1, 8, 5),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(2),
    torch.nn.BatchNorm2d(8),
    
    torch.nn.Conv2d(8, 16, 5),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(2),
    torch.nn.BatchNorm2d(16),
    torch.nn.Flatten(),
    
    torch.nn.Linear(16*4*4, 128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 32),
    torch.nn.ReLU(),
    torch.nn.Linear(32, 10)).cuda()

torchsummary.summary(model1, input_size=DATA0.shape[1:])

train(model1)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 24, 24]             208
              ReLU-2            [-1, 8, 24, 24]               0
         MaxPool2d-3            [-1, 8, 12, 12]               0
       BatchNorm2d-4            [-1, 8, 12, 12]              16
            Conv2d-5             [-1, 16, 8, 8]           3,216
              ReLU-6             [-1, 16, 8, 8]               0
         MaxPool2d-7             [-1, 16, 4, 4]               0
       BatchNorm2d-8             [-1, 16, 4, 4]              32
           Flatten-9                  [-1, 256]               0
           Linear-10                  [-1, 128]          32,896
             ReLU-11                  [-1, 128]               0
           Linear-12                   [-1, 32]           4,128
             ReLU-13                   [-1, 32]               0
           Linear-14                   

In [5]:
model2 = torch.nn.Sequential(
    torch.nn.Conv2d(1, 24, 5),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(2),
    torch.nn.BatchNorm2d(24),
    
    torch.nn.Conv2d(24, 48, 5),
    torch.nn.ReLU(),    
    torch.nn.MaxPool2d(2),
    torch.nn.BatchNorm2d(48),
    torch.nn.Flatten(),
    
    
    torch.nn.Linear(48*4*4, 128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 32),
    torch.nn.ReLU(),
    torch.nn.Linear(32, 10)
    ).cuda()

torchsummary.summary(model2, input_size=DATA0.shape[1:])

train(model2, epochs = 50)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 24, 24, 24]             624
              ReLU-2           [-1, 24, 24, 24]               0
         MaxPool2d-3           [-1, 24, 12, 12]               0
       BatchNorm2d-4           [-1, 24, 12, 12]              48
            Conv2d-5             [-1, 48, 8, 8]          28,848
              ReLU-6             [-1, 48, 8, 8]               0
         MaxPool2d-7             [-1, 48, 4, 4]               0
       BatchNorm2d-8             [-1, 48, 4, 4]              96
           Flatten-9                  [-1, 768]               0
           Linear-10                  [-1, 128]          98,432
             ReLU-11                  [-1, 128]               0
           Linear-12                   [-1, 32]           4,128
             ReLU-13                   [-1, 32]               0
           Linear-14                   

In [6]:
model3 = torch.nn.Sequential(
    torch.nn.Conv2d(1, 16, 5),
    torch.nn.BatchNorm2d(16),
    torch.nn.ReLU(),
    torch.nn.Conv2d(16, 16, 5, padding=2),
    torch.nn.ReLU(),    
    torch.nn.MaxPool2d(2),
    torch.nn.BatchNorm2d(16),
    
    
    torch.nn.Conv2d(16, 16, 5),
    torch.nn.BatchNorm2d(16),
    torch.nn.ReLU(),
    torch.nn.Conv2d(16, 16, 5, padding=2),
    torch.nn.ReLU(),    
    torch.nn.MaxPool2d(2),
    torch.nn.BatchNorm2d(16),
    torch.nn.Flatten(),
    
    torch.nn.Linear(16*4*4, 128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 32),
    torch.nn.ReLU(),
    torch.nn.Linear(32, 10)
    ).cuda()

torchsummary.summary(model3, input_size=DATA0.shape[1:])
train(model3, epochs = 30)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 24, 24]             416
       BatchNorm2d-2           [-1, 16, 24, 24]              32
              ReLU-3           [-1, 16, 24, 24]               0
            Conv2d-4           [-1, 16, 24, 24]           6,416
              ReLU-5           [-1, 16, 24, 24]               0
         MaxPool2d-6           [-1, 16, 12, 12]               0
       BatchNorm2d-7           [-1, 16, 12, 12]              32
            Conv2d-8             [-1, 16, 8, 8]           6,416
       BatchNorm2d-9             [-1, 16, 8, 8]              32
             ReLU-10             [-1, 16, 8, 8]               0
           Conv2d-11             [-1, 16, 8, 8]           6,416
             ReLU-12             [-1, 16, 8, 8]               0
        MaxPool2d-13             [-1, 16, 4, 4]               0
      BatchNorm2d-14             [-1, 1

In [7]:
model4 = torch.nn.Sequential(
    torch.nn.Conv2d(1, 32, 5),
    torch.nn.BatchNorm2d(32),
    torch.nn.ReLU(),
    torch.nn.Conv2d(32, 32, 5, padding=2),
    torch.nn.ReLU(),    
    torch.nn.MaxPool2d(2),
    torch.nn.BatchNorm2d(32),
    
    torch.nn.Dropout(0.3),
    
    torch.nn.Conv2d(32, 64, 5),
    torch.nn.BatchNorm2d(64),
    torch.nn.ReLU(),
    torch.nn.Conv2d(64, 64, 5, padding=2),
    torch.nn.ReLU(),    
    torch.nn.MaxPool2d(2),
    torch.nn.BatchNorm2d(64),
    torch.nn.Flatten(),
    
    torch.nn.Dropout(0.3),
    
    torch.nn.Linear(64*4*4, 128),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.3),
    
    torch.nn.Linear(128, 32),
    torch.nn.ReLU(),
    torch.nn.Linear(32, 10)
    ).cuda()

torchsummary.summary(model4, input_size=DATA0.shape[1:])
train(model4, epochs = 30)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 24, 24]             832
       BatchNorm2d-2           [-1, 32, 24, 24]              64
              ReLU-3           [-1, 32, 24, 24]               0
            Conv2d-4           [-1, 32, 24, 24]          25,632
              ReLU-5           [-1, 32, 24, 24]               0
         MaxPool2d-6           [-1, 32, 12, 12]               0
       BatchNorm2d-7           [-1, 32, 12, 12]              64
           Dropout-8           [-1, 32, 12, 12]               0
            Conv2d-9             [-1, 64, 8, 8]          51,264
      BatchNorm2d-10             [-1, 64, 8, 8]             128
             ReLU-11             [-1, 64, 8, 8]               0
           Conv2d-12             [-1, 64, 8, 8]         102,464
             ReLU-13             [-1, 64, 8, 8]               0
        MaxPool2d-14             [-1, 6