# Implementing a CNN with PyTorch

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from tqdm import trange
from tqdm.notebook import tqdm

class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, padding=2)

        # where does the 7 * 7 come from?
        self.fc1 = nn.Linear(7*7*64, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        # conv layer 1
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2)
        
        # conv layer 2
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2)
        
        # fc layer 1
        x = x.view(-1, 7*7*64)
        x = self.fc1(x)
        x = F.relu(x)
        
        # fc layer 2
        x = self.fc2(x)
        return x  

def get_device():
    if torch.backends.mps.is_available():
        return torch.device("mps")
    elif torch.cuda.is_available():
        return torch.device("cuda")
    else:
        return torch.device("cpu")

device = get_device()
print(f"Using device: {device}")

# Load the data
generator = torch.Generator(device)

mnist_train = datasets.MNIST(root="./datasets", 
                             train=True, 
                             transform=transforms.ToTensor(), 
                             download=True)
mnist_test = datasets.MNIST(root="./datasets", 
                            train=False, 
                            transform=transforms.ToTensor(), 
                            download=True)
train_loader = torch.utils.data.DataLoader(mnist_train, 
                                           batch_size=100, 
                                           shuffle=True,
                                           generator=generator)
test_loader = torch.utils.data.DataLoader(mnist_test, 
                                          batch_size=100, 
                                          shuffle=False,
                                          generator=generator)

model = CNN().to(device)

# Training
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in trange(3):
    for images, labels in tqdm(train_loader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        
        x = images
        y = model(images)
        
        loss(y, labels).backward()
        optimizer.step()

# Testing
correct = 0
total = len(mnist_test)

with torch.no_grad():
    for images, labels in tqdm(test_loader):
        images, labels = images.to(device), labels.to(device)
        x = images
        y = model(x)
        
        predictions = torch.argmax(y, dim=1)
        correct += torch.sum((predictions == labels).float())

print(f"Test accuracy {correct/total}")

Using device: mps


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/600 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]


RuntimeError: Mismatched Tensor types in NNPack convolutionOutput