In [1]:
import torch
import torchvision as tv
import torchsummary

In [19]:
samples0, samples1 = 60000, 10000
classes = 10

source0 = tv.datasets.MNIST("../../MNIST", train = True, download = False)
source1 = tv.datasets.MNIST("../../MNIST", train = False, download = False)
DATA0 = source0.data.unsqueeze(1).float().cuda()
DATA1 = source1.data.unsqueeze(1).float().cuda()
TARGET0 = source0.targets.cuda()
TARGET1 = source1.targets.cuda()

print(DATA0[1].shape)

torch.Size([1, 28, 28])


In [21]:
model1 = torch.nn.Sequential(
    torch.nn.Conv2d(1, 8, 5),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(2),
    torch.nn.Conv2d(8, 16, 5),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(2),
    torch.nn.Flatten(),
    torch.nn.Linear(16*4*4, 128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 32),
    torch.nn.ReLU(),
    torch.nn.Linear(32, 10)).cuda()

torchsummary.summary(model, input_size=DATA0.shape[1:])

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 12, 12]             208
              ReLU-2            [-1, 8, 12, 12]               0
            Conv2d-3             [-1, 16, 4, 4]           3,216
              ReLU-4             [-1, 16, 4, 4]               0
           Flatten-5                  [-1, 256]               0
            Linear-6                  [-1, 128]          32,896
              ReLU-7                  [-1, 128]               0
            Linear-8                   [-1, 32]           4,128
              ReLU-9                   [-1, 32]               0
           Linear-10                   [-1, 10]             330
Total params: 40,778
Trainable params: 40,778
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.03
Params size (MB): 0.16
Estimated Tot

In [45]:
def train(model):
    variables = model.parameters()
    batch = 1000
    optimizer = torch.optim.Adam(variables)
    for epoch in range(100):
        LOSS0 = torch.zeros((), device = "cuda")
        ACCURACY0 = torch.zeros((), device = "cuda")
        count0 = 0
        for index in range(0, samples0, batch):
            optimizer.zero_grad()
            DATA = DATA0[index : index + batch]
            TARGET = TARGET0[index : index + batch]
            count = TARGET.size(0)
            ACTIVATION = model(DATA)
            LOSS = torch.nn.functional.cross_entropy(ACTIVATION, TARGET)
            LOSS0 += LOSS * count
            VALUE = torch.argmax(ACTIVATION, 1)
            ACCURACY0 += torch.sum(VALUE == TARGET)
            count0 += count
            LOSS.backward()
            optimizer.step()
        LOSS0 /= count0
        ACCURACY0 /= count0
        with torch.no_grad():
            LOSS1 = torch.zeros((), device = "cuda")
            ACCURACY1 = torch.zeros((), device = "cuda")
            count1 = 0
            for index in range(0, samples1, batch):
                DATA = DATA1[index : index + batch]
                TARGET = TARGET1[index : index + batch]
                ACTIVATION = model(DATA)
                LOSS1 += torch.nn.functional.cross_entropy(ACTIVATION, TARGET, reduction = "sum")
                VALUE = torch.argmax(ACTIVATION, 1)
                ACCURACY1 += torch.sum(VALUE == TARGET)
                count1 += TARGET.size(0)
            LOSS1 /= count1
            ACCURACY1 /= count1
        print("%5d %12.3f %4.3f %12.3f %4.3f" % \
              (epoch, LOSS0, ACCURACY0, LOSS1, ACCURACY1), flush = True)

        #parameters: 46 090 + 208 = 46 298
        #accuracy: train: 1000 test: 981

        #this net with one convolutional and one dense layer performs better than two-layer dense net but has over 17 times less parameters.

In [23]:
train(model1)

    0        1.005 0.701        0.225 0.933
    1        0.183 0.944        0.127 0.960
    2        0.116 0.964        0.095 0.970
    3        0.090 0.972        0.079 0.976
    4        0.074 0.977        0.073 0.977
    5        0.064 0.980        0.069 0.977
    6        0.056 0.983        0.069 0.979
    7        0.049 0.985        0.061 0.981
    8        0.044 0.987        0.060 0.982
    9        0.040 0.988        0.074 0.977
   10        0.037 0.989        0.073 0.978
   11        0.034 0.989        0.067 0.979
   12        0.032 0.990        0.068 0.979
   13        0.030 0.991        0.066 0.979
   14        0.027 0.992        0.051 0.985
   15        0.023 0.993        0.049 0.986
   16        0.020 0.994        0.055 0.984
   17        0.019 0.994        0.064 0.984
   18        0.019 0.994        0.065 0.983
   19        0.019 0.994        0.064 0.983
   20        0.020 0.993        0.047 0.987
   21        0.017 0.994        0.058 0.984
   22        0.014 0.996        

In [69]:
model2 = torch.nn.Sequential(
    torch.nn.Conv2d(1, 24, 5),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(2),
    torch.nn.Conv2d(24, 48, 5),
    torch.nn.ReLU(),    
    torch.nn.MaxPool2d(2),
#     torch.nn.Dropout(0.1),
    torch.nn.Flatten(),
    torch.nn.Linear(48*4*4, 128),
    torch.nn.ReLU(),
#     torch.nn.Dropout(0.1),
    torch.nn.Linear(128, 32),
    torch.nn.ReLU(),
#     torch.nn.Dropout(0.1),
    torch.nn.Linear(32, 10)
    ).cuda()

torchsummary.summary(model2, input_size=DATA0.shape[1:])

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 24, 24, 24]             624
              ReLU-2           [-1, 24, 24, 24]               0
         MaxPool2d-3           [-1, 24, 12, 12]               0
            Conv2d-4             [-1, 48, 8, 8]          28,848
              ReLU-5             [-1, 48, 8, 8]               0
         MaxPool2d-6             [-1, 48, 4, 4]               0
           Flatten-7                  [-1, 768]               0
            Linear-8                  [-1, 128]          98,432
              ReLU-9                  [-1, 128]               0
           Linear-10                   [-1, 32]           4,128
             ReLU-11                   [-1, 32]               0
           Linear-12                   [-1, 10]             330
Total params: 132,362
Trainable params: 132,362
Non-trainable params: 0
-------------------------------

In [70]:
train(model2)

    0        1.112 0.665        0.167 0.951
    1        0.129 0.963        0.084 0.974
    2        0.077 0.977        0.060 0.981
    3        0.057 0.983        0.048 0.984
    4        0.045 0.986        0.043 0.986
    5        0.039 0.988        0.046 0.986
    6        0.034 0.990        0.053 0.983
    7        0.030 0.991        0.053 0.982
    8        0.026 0.992        0.043 0.985
    9        0.024 0.992        0.051 0.984
   10        0.021 0.993        0.058 0.982
   11        0.022 0.993        0.047 0.985
   12        0.019 0.994        0.039 0.987
   13        0.016 0.995        0.039 0.987
   14        0.013 0.996        0.037 0.990
   15        0.009 0.997        0.039 0.989
   16        0.009 0.997        0.043 0.988
   17        0.008 0.997        0.039 0.990
   18        0.008 0.997        0.035 0.990
   19        0.005 0.998        0.040 0.990
   20        0.004 0.999        0.039 0.991
   21        0.004 0.999        0.049 0.990
   22        0.004 0.999        

KeyboardInterrupt: 

In [127]:
model3 = torch.nn.Sequential(
    torch.nn.Conv2d(1, 16, 5),
    torch.nn.BatchNorm2d(16),
    torch.nn.ReLU(),
    torch.nn.Conv2d(16, 16, 5, padding=2),
    torch.nn.ReLU(),    
    torch.nn.MaxPool2d(2),
    torch.nn.BatchNorm2d(16),
    
    
#     torch.nn.Dropout(0.2),
    
    torch.nn.Conv2d(16, 16, 5),
    torch.nn.BatchNorm2d(16),
    torch.nn.ReLU(),
    torch.nn.Conv2d(16, 16, 5, padding=2),
    torch.nn.ReLU(),    
    torch.nn.MaxPool2d(2),
    torch.nn.BatchNorm2d(16),
    torch.nn.Flatten(),
    
#     torch.nn.Dropout(0.2),
    
    torch.nn.Linear(16*4*4, 128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 32),
    torch.nn.ReLU(),
    torch.nn.Linear(32, 10)
    ).cuda()

torchsummary.summary(model3, input_size=DATA0.shape[1:])
train(model3)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 24, 24]             416
       BatchNorm2d-2           [-1, 16, 24, 24]              32
              ReLU-3           [-1, 16, 24, 24]               0
            Conv2d-4           [-1, 16, 24, 24]           6,416
              ReLU-5           [-1, 16, 24, 24]               0
         MaxPool2d-6           [-1, 16, 12, 12]               0
       BatchNorm2d-7           [-1, 16, 12, 12]              32
            Conv2d-8             [-1, 16, 8, 8]           6,416
       BatchNorm2d-9             [-1, 16, 8, 8]              32
             ReLU-10             [-1, 16, 8, 8]               0
           Conv2d-11             [-1, 16, 8, 8]           6,416
             ReLU-12             [-1, 16, 8, 8]               0
        MaxPool2d-13             [-1, 16, 4, 4]               0
      BatchNorm2d-14             [-1, 1

KeyboardInterrupt: 