In [12]:
import os
import time
import torch
import torch.optim as optim
import torch.nn as nn
import matplotlib.pyplot as plt
from torchsummary import summary
from MNISTData import MNISTData
from AutoEncoder import AutoEncoder
from EncoderClassifier import EncoderClassifier

In [2]:
data = MNISTData()
train_loader = data.get_train_loader()
test_loader = data.get_test_loader()

In [3]:
load_from = "./autoencoder_models/1558553790/ae.pt"

In [4]:
ae = AutoEncoder()
ae.load_state_dict(torch.load(load_from))

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [5]:
summary(ae, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                  [-1, 256]         200,960
              ReLU-2                  [-1, 256]               0
            Linear-3                   [-1, 64]          16,448
              ReLU-4                   [-1, 64]               0
            Linear-5                   [-1, 16]           1,040
              ReLU-6                   [-1, 16]               0
            Linear-7                   [-1, 64]           1,088
              ReLU-8                   [-1, 64]               0
            Linear-9                  [-1, 256]          16,640
             ReLU-10                  [-1, 256]               0
           Linear-11                  [-1, 784]         201,488
          Sigmoid-12                  [-1, 784]               0
Total params: 437,664
Trainable params: 437,664
Non-trainable params: 0
-------------------------------

In [6]:
encoder = ae.encoder
summary(encoder, input_size=(1, 1, 784))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1            [-1, 1, 1, 256]         200,960
              ReLU-2            [-1, 1, 1, 256]               0
            Linear-3             [-1, 1, 1, 64]          16,448
              ReLU-4             [-1, 1, 1, 64]               0
            Linear-5             [-1, 1, 1, 16]           1,040
              ReLU-6             [-1, 1, 1, 16]               0
Total params: 218,448
Trainable params: 218,448
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.01
Params size (MB): 0.83
Estimated Total Size (MB): 0.84
----------------------------------------------------------------


In [10]:
ec = EncoderClassifier(encoder)
summary(ec, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                  [-1, 256]         200,960
              ReLU-2                  [-1, 256]               0
            Linear-3                   [-1, 64]          16,448
              ReLU-4                   [-1, 64]               0
            Linear-5                   [-1, 16]           1,040
              ReLU-6                   [-1, 16]               0
            Linear-7                 [-1, 1000]          17,000
           Dropout-8                 [-1, 1000]               0
            Linear-9                   [-1, 10]          10,010
Total params: 245,458
Trainable params: 27,010
Non-trainable params: 218,448
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.02
Params size (MB): 0.94
Estimated Total Size (MB): 0.96
--------------------------------------

In [13]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(ec.parameters(), lr=.01, momentum=.9)

In [15]:
for epoch in range(25):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = ec(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

print('Finished Training')

[1,   200] loss: 17.716
[1,   400] loss: 16.729
[2,   200] loss: 14.467
[2,   400] loss: 14.372
[3,   200] loss: 14.653
[3,   400] loss: 15.576
[4,   200] loss: 14.750
[4,   400] loss: 12.711
[5,   200] loss: 12.234
[5,   400] loss: 11.892
[6,   200] loss: 12.112
[6,   400] loss: 10.221
[7,   200] loss: 9.562
[7,   400] loss: 7.831
[8,   200] loss: 6.811
[8,   400] loss: 6.507
[9,   200] loss: 5.405
[9,   400] loss: 4.271
[10,   200] loss: 3.663
[10,   400] loss: 3.246
[11,   200] loss: 2.479
[11,   400] loss: 2.162
[12,   200] loss: 1.574
[12,   400] loss: 1.234
[13,   200] loss: 1.084
[13,   400] loss: 0.990
[14,   200] loss: 0.885
[14,   400] loss: 0.823
[15,   200] loss: 0.822
[15,   400] loss: 0.764
[16,   200] loss: 0.774
[16,   400] loss: 0.793
[17,   200] loss: 0.765
[17,   400] loss: 0.728
[18,   200] loss: 0.744
[18,   400] loss: 0.761
[19,   200] loss: 0.761
[19,   400] loss: 0.747
[20,   200] loss: 0.743
[20,   400] loss: 0.772
[21,   200] loss: 0.735
[21,   400] loss: 0.75

In [17]:
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        outputs = ec(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 81 %
