In [11]:
import torch

In [12]:
from tensorflow.keras.datasets import mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

In [13]:
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255

In [14]:
from sklearn.model_selection import train_test_split
features_train, features_test, targets_train, targets_test = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)

In [15]:
featuresTrain = torch.from_numpy(features_train)
targetsTrain = torch.from_numpy(targets_train)
featuresTest = torch.from_numpy(features_test)
targetsTest = torch.from_numpy(targets_test)
print(features_train.dtype)
print(features_train.shape)
print(targets_train.dtype)
print(targets_train.shape)

float32
(48000, 28, 28)
uint8
(48000,)


In [16]:
train = torch.utils.data.TensorDataset(featuresTrain, targetsTrain)
test  = torch.utils.data.TensorDataset(featuresTest, targetsTest)

In [17]:
featuresTrain.shape

torch.Size([48000, 28, 28])

In [18]:
# Hyper Parameters
# batch_size, epoch and iteration
LR = 0.01
batch_size = 100
n_iters = 10000
num_epochs = n_iters / (len(features_train) / batch_size)
num_epochs = int(num_epochs)
num_epochs

20

In [19]:
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
test_loader  = torch.utils.data.DataLoader(test,  batch_size=batch_size, shuffle=True)


In [20]:
dataiter = iter(train_loader)
(images, labels) = dataiter.next()
print(type(images))
print(images.shape)
print(labels.shape)

<class 'torch.Tensor'>
torch.Size([100, 28, 28])
torch.Size([100])


In [21]:
import torch.nn as nn

# Create CNN Model
class CNN_Model(nn.Module):
    def __init__(self):
        super(CNN_Model, self).__init__()
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=0)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=0)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        self.fc1 = nn.Linear(32*4*4, 10)
    
    def forward(self, x):
        out = self.cnn1(x)
        out = self.relu1(out)
        out = self.maxpool1(out)
        out = self.cnn2(out)
        out = self.relu2(out)
        out = self.maxpool2(out)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        return out

In [22]:
model = CNN_Model()
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
loss_func = nn.CrossEntropyLoss()
input_shape = (-1,1,28,28)

CNN_Model(
  (cnn1): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1))
  (relu1): ReLU()
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (cnn2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
  (relu2): ReLU()
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=512, out_features=10, bias=True)
)


In [23]:
from torch.autograd import Variable
def fit_model(model, loss_func, optimizer, input_shape, num_epochs, train_loader, test_loader):
    training_loss = []
    trianing_accuracy = []
    validation_loss = []
    validation_accuracy = []
    for epoch in range(num_epochs):
        correct_train = 0
        total_train = 0
        for i, (images, labels) in enumerate(train_loader):
            train = Variable(images.view(input_shape))
            labels = Variable(labels)
            optimizer.zero_grad()
            outputs = model(train)
            train_loss = loss_func(outputs, labels)
            train_loss.backward()
            optimizer.step()
            predicted = torch.max(outputs.data, 1)[1]
            total_train += len(labels)
            correct_train += (predicted == labels).float().sum() 
        trian_accuracy = 100 * correct_train / float(total_train)
        trianing_accuracy.append(trian_accuracy)
        training_loss.append(train_loss.data)
        
        correct_test = 0
        total_test = 0
        for i, (images, labels) in enumerate(test_loader):
            test = Variable(images.view(input_shape))
            outputs = model(test)
            val_loss = loss_func(outputs, labels)
            predicted = torch.max(outputs, 1)[1]
            total_test += len(labels)
            correct_test += (predicted == labels).float().sum()
        val_accuracy = 100 * correct_test / float(total_test)
        validation_accuracy.append(val_accuracy)
        validation_loss.append(val_loss.data)
        print('Train Epoch: {}/{} Training_Loss: {} Training_Acc: {:.6f} Val_Loss: {} Val_Acc: {:.6f}'.format(epoch+1, num_epochs, train_loss.data, trian_accuracy, val_loss.data, val_accuracy))
    return training_loss, trianing_accuracy, validation_loss, validation_accuracy

In [24]:
training_loss, training_accuracy, validation_loss, validation_accuracy = fit_model(model, loss_func, optimizer, input_shape, num_epochs, train_loader, test_loader)

Train Epoch: 1/20 Training_Loss: 0.09328491240739822 Training_Acc: 94.199997 Val_Loss: 0.07219959795475006 Val_Acc: 97.691666
Train Epoch: 2/20 Training_Loss: 0.024533068761229515 Training_Acc: 98.125000 Val_Loss: 0.01885213889181614 Val_Acc: 97.858330
Train Epoch: 3/20 Training_Loss: 0.07659858465194702 Training_Acc: 98.439583 Val_Loss: 0.04674234241247177 Val_Acc: 98.241669
Train Epoch: 4/20 Training_Loss: 0.01860261894762516 Training_Acc: 98.664581 Val_Loss: 0.07464250922203064 Val_Acc: 98.516670
Train Epoch: 5/20 Training_Loss: 0.0032795520965009928 Training_Acc: 98.702080 Val_Loss: 0.023051107302308083 Val_Acc: 98.000000
Train Epoch: 6/20 Training_Loss: 0.045174866914749146 Training_Acc: 98.729164 Val_Loss: 0.09574983268976212 Val_Acc: 98.241669
Train Epoch: 7/20 Training_Loss: 0.01883605867624283 Training_Acc: 98.770836 Val_Loss: 0.21834927797317505 Val_Acc: 98.533333
Train Epoch: 8/20 Training_Loss: 0.032394200563430786 Training_Acc: 98.902084 Val_Loss: 0.012442726641893387 Val_