### Simple Convolutional Neural Network

#### Data Loader

In [65]:
%matplotlib inline
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor

# Downloading MNIST dataset
print("Loading dataset...")
train_data = datasets.MNIST(
    root="dataset",
    train=True,
    download=True,
    transform=ToTensor(),
)
test_data = datasets.MNIST(
    root="dataset",
    train=False,
    download=True,
    transform=ToTensor(),
)

# Hyperparameters
learning_rate = 0.01
batch_size = 1
# epochs = 5

# Set data loader
train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
print("Done")

Loading dataset...
Done


#### Network Setup

In [66]:
# train_features, train_labels = next(iter(train_dataloader))
# print(train_features.shape, train_labels.shape)
# print(f"Label: {train_labels[0]}")
# plt.imshow(train_features[0].squeeze(), cmap=plt.cm.gray)

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

class MarioNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 128),
            nn.ReLU(),
            nn.Linear(128, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

Using cpu device


#### Network Training

In [69]:
# Model
model = MarioNet().to(device)
print(model)

# Training
model.train()
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
losses = []
for batch, (X, y) in enumerate(train_dataloader):
    # Forward pass
    logits = model(X)
    loss = loss_function(logits, y)

    # Backpropagation
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    if batch % 100 == 0:
        loss, current = loss.item(), batch * batch_size + len(X)
        losses.append(loss)
        print(f"loss: {loss:>7f}  [{current:>5d}/{len(train_dataloader.dataset):>5d}]")

plt.plot(losses)

# X = torch.rand(1, 28, 28, device=device)
# logits = model(X)
# pred_probab = nn.Softmax(dim=1)(logits)
# pred_list = pred_probab.squeeze().tolist()
# y_pred = pred_probab.argmax(1)
# print(f"Predicted class: {y_pred.squeeze()}")
# plt.bar(range(len(pred_list)), pred_list)

MarioNet(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=10, bias=True)
  )
)
loss: 2.318636  [    1/60000]
loss: 1.940265  [  101/60000]
loss: 1.865941  [  201/60000]
loss: 1.497973  [  301/60000]
loss: 0.666072  [  401/60000]
loss: 2.809870  [  501/60000]
loss: 1.914830  [  601/60000]
loss: 0.577269  [  701/60000]


KeyboardInterrupt: 

#### Test Network

In [None]:
test_loss = 0
losses = []
with torch.no_grad():
    for X, y in test_dataloader:
        logits = model(X)
        loss = loss_function(logits, y).item()
        test_loss += loss
        losses.append(loss)
test_loss /= len(test_dataloader)
print(test_loss)
plt.plot(losses)