# CNN

In [9]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
import numpy as np

In [10]:

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.MNIST(
    root="./data", train=True, download=True, transform=transform)
testset = torchvision.datasets.MNIST(
    root="./data", train=False, download=True, transform=transform)

validation_split = 0.1
split = int(np.floor(validation_split * len(trainset)))
indices = list(range(len(trainset)))
np.random.shuffle(indices)

train_indices, valid_indices = indices[split:], indices[:split]
train_sampler = data.SubsetRandomSampler(train_indices)
valid_sampler = data.SubsetRandomSampler(valid_indices)

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=1, num_workers=4, sampler=train_sampler)
validloader = torch.utils.data.DataLoader(
    trainset, batch_size=1, num_workers=4, sampler=valid_sampler)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=1, shuffle=False, num_workers=4)

In [8]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels=1, out_channels=8, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(
            in_channels=8, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(
            in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 5 * 5, 200)
        self.fc2 = nn.Linear(200, 10)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = x.view(-1, 64 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

    def evaluate(self, loader):
        with torch.no_grad():
            correct, total = 0, 0
            for data in loader:
                # get the inputs
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = self(inputs)
                val_loss = criterion(outputs, labels)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        return val_loss, correct / total

In [11]:
model = Net()
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
model.to(device)
print("Let\'s use {}".format(device))

Let's use cpu


In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

In [6]:
best_val_acc, train_loss, val_loss = -1, None, None
print("{:5s} | {:10s} | {:10s} | {:10s}".format("epoch", "train_loss",
                                                "valid_loss", "valid_acc"))
for epoch in range(10):  # loop over the dataset multiple times
    model.train()
    for i, data in enumerate(trainloader, 1):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = model(inputs)
        train_loss = criterion(outputs, labels)
        train_loss.backward()
        optimizer.step()
    val_loss, val_acc = model.evaluate(validloader)
    print(
        "\r{:5d} | {:10.3f} | {:10.3f} | {:10.1%}".format(
            epoch + 1, train_loss.item(), val_loss.item(), val_acc),
        end="")
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_model")
        print("\tsaved!", end="")
    print("")
model.load_state_dict(torch.load("best_model"))

epoch | train_loss | valid_loss | valid_acc 
    1 |      0.004 |      0.001 |      97.5%	saved!
    2 |      0.000 |      0.000 |      98.1%	saved!
    3 |      0.000 |      0.000 |      98.3%	saved!
    4 |      0.000 |      0.000 |      98.4%	saved!
    5 |      0.000 |      0.002 |      98.5%	saved!
    6 |      0.000 |      0.000 |      98.7%	saved!
    7 |      0.000 |      0.004 |      98.6%
    8 |      0.000 |      0.000 |      98.6%
    9 |      0.000 |      0.000 |      98.7%
   10 |      0.000 |      0.000 |      98.9%	saved!


In [7]:
print("Accuracy of the model on the 10000 test images: {:3.2%}".format(
    model.evaluate(testloader)[1]))

Accuracy of the model on the 10000 test images: 99.02%
