### MNIST Classification using CNNs

In [1]:
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Conv2D, Flatten, Dense, Dropout, MaxPooling2D
from keras.utils import to_categorical

(x_train,y_train), (x_test,y_test) = mnist.load_data()
print(x_train.shape, x_test.shape)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


(60000, 28, 28) (10000, 28, 28)


### Prepare dataset

In [2]:
x_train = ((x_train.reshape(60000,28,28,1)).astype('float32'))/255
x_test = ((x_test.reshape(10000,28,28,1)).astype('float32'))/255
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

### Create model

In [3]:
model = Sequential()
model.add(Conv2D(32,(3,3), activation='relu', input_shape=(28,28,1)))
model.add(MaxPooling2D((2,2)))
model.add(Conv2D(64, (3,3), activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(Conv2D(128, (3,3), activation='relu'))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 3, 3, 128)         73856     
_________________________________________________________________
dropout_1 (Dropout)          (None, 3, 3, 128)         0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 1152)              0         
__________

#### Compile and Train

In [4]:
model.compile(optimizer='adam',
             loss='categorical_crossentropy',
             metrics=['accuracy'])

model.fit(x_train, y_train, epochs=10, batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2aeb92541d0>

#### Test model

In [6]:
test_loss,test_acc = model.evaluate(x_test,y_test)
print(test_acc)

0.9926


### Using Pytorch with similar model

In [7]:
import torch
import torch.nn as nn
from torch.nn import Conv2d, ReLU, MaxPool2d, Dropout2d,Linear
from torchvision.datasets import MNIST
from torch.autograd import Variable
import torch.optim as optim
from torchvision import transforms
import torch.nn.functional as F
from torch.utils.data import DataLoader

%matplotlib inline

In [8]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [9]:
all_transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))]) #normalize with mean and std of mnist

train_data = MNIST('data/', train=True, transform=all_transform,download=True)
test_data = MNIST('data/', train=False, transform=all_transform,download=False)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

In [13]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.conv1 = nn.Sequential(
            Conv2d(1, 32, kernel_size=3),
            ReLU(),
            MaxPool2d(kernel_size=2))
        self.conv2 = nn.Sequential(
            Conv2d(32, 64, kernel_size=3),
            ReLU(),
            MaxPool2d(kernel_size=2))
        self.conv3 = nn.Sequential(
            Conv2d(64, 128, kernel_size=3),
            ReLU(),
            Dropout2d())
        self.fc1 = Linear(1152,64)
        self.fc2 = Linear(64,10)
    
    def forward(self,x):
        out = self.conv1(x)
        out = self.conv2(out)
        out = self.conv3(out)
        out = out.reshape(out.size(0),-1)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

model = Net().to(device)

In [14]:
#Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [16]:
#Train model
num_epochs = 10
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 400 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch [1/10], Step [400/938], Loss: 0.0785
Epoch [1/10], Step [800/938], Loss: 0.0263
Epoch [2/10], Step [400/938], Loss: 0.0801
Epoch [2/10], Step [800/938], Loss: 0.0041
Epoch [3/10], Step [400/938], Loss: 0.0558
Epoch [3/10], Step [800/938], Loss: 0.0263
Epoch [4/10], Step [400/938], Loss: 0.0067
Epoch [4/10], Step [800/938], Loss: 0.0403
Epoch [5/10], Step [400/938], Loss: 0.0029
Epoch [5/10], Step [800/938], Loss: 0.1677
Epoch [6/10], Step [400/938], Loss: 0.0818
Epoch [6/10], Step [800/938], Loss: 0.0053
Epoch [7/10], Step [400/938], Loss: 0.0057
Epoch [7/10], Step [800/938], Loss: 0.0325
Epoch [8/10], Step [400/938], Loss: 0.0086
Epoch [8/10], Step [800/938], Loss: 0.0117
Epoch [9/10], Step [400/938], Loss: 0.0008
Epoch [9/10], Step [800/938], Loss: 0.0195
Epoch [10/10], Step [400/938], Loss: 0.0448
Epoch [10/10], Step [800/938], Loss: 0.0050


In [17]:
#Test model
model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model: {} %'.format(100 * correct / total))


Test Accuracy of the model: 99.23 %
