In [5]:
# This comes from https://www.youtube.com/watch?v=vBlO87ZAiiw&ab_channel=NeuralNine

from torchvision import datasets
from torchvision.transforms import ToTensor

In [6]:
train_data = datasets.MNIST(
    root = "data",
    train = True,
    transform = ToTensor(), # Tensors are similar to numpy arrays, but can also be used on a GPU to accelerate computing
    download = True
)

test_data = datasets.MNIST(
    root = "data",
    train = False,
    transform = ToTensor(), # Tensors are similar to numpy arrays, but can also be used on a GPU to accelerate computing
    download = True
)

In [7]:
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [8]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: data
    Split: Test
    StandardTransform
Transform: ToTensor()

In [9]:
train_data.targets.size()

torch.Size([60000])

In [10]:
from torch.utils.data import DataLoader # DataLoader is an iterable that allows us to batch and shuffle the data

# Create data loaders for the training and test sets
loaders = {
    'train' : DataLoader(train_data, batch_size = 100, shuffle = True, num_workers = 1),
    'test' : DataLoader(test_data, batch_size = 100, shuffle = True, num_workers = 1),
}

loaders

{'train': <torch.utils.data.dataloader.DataLoader at 0x1e44bbb18b0>,
 'test': <torch.utils.data.dataloader.DataLoader at 0x1e44be41490>}

In [11]:
# Define NN architecture
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Inherit from nn.Module
class CNN(nn.Module):

    def __init__(self):
        super(CNN, self).__init__()                                  # Call the constructor of the parent class

        self.conv1 = nn.Conv2d(1, 10, kernel_size = 5)               # 1 input channel, 10 output channels, 5x5 kernel
        self.conv2 = nn.Conv2d(10, 20, kernel_size = 5)
        self.conv2_drop = nn.Dropout2d()                             # Regularization layout to deactive certain nodes to prevent overfitting during training
        self.fc1 = nn.Linear(320, 50)                                # 320 input features, 50 output features
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):                                            # define the activation functions
        x = F.relu(F.max_pool2d(self.conv1(x), 2))                   # 2x2 max pooling => this reduces the size of the image by half
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))  # we call the dropout layer here because we want to apply it after the activation function
        x = x.view(-1, 320)                                          # flatten the tensor: 20 channels * 4x4 image size
        x = F.relu(self.fc1(x))                                      # fully connected layer
        x = F.dropout(x, training = self.training)                   # apply dropout layer
        x = self.fc2(x)                                              
        return F.log_softmax(x, dim = 1)                             # apply softmax to get probabilities so that we can interpret the output as probabilities

In [14]:
import torch

print(torch.cuda.is_available())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
optimizer = optim.Adam(model.parameters(), lr = 0.001)               # Stochastic Gradient Descent to optimize the model, we use lr = 0.001 as learning rate to update the weights
loss_fn = nn.CrossEntropyLoss()                                      # CrossEntropyLoss is used for classification problems since it combines the softmax and the negative log likelihood loss

def train(epoch):
    model.train() 
    for batch_idx, (data, target) in enumerate(loaders['train']):
        data, target = data.to(device), target.to(device)            # move the data to the GPU if available
        optimizer.zero_grad()                                        # set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes
        output = model(data)
        loss = loss_fn(output, target)
        loss.backward()                                              # store the gradients in the model and update the weights by calling the optimizer
        optimizer.step()

        # Every 10 batches
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(loaders['train'].dataset),
                100. * batch_idx / len(loaders['train']), loss.item()))

def test():
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():                                            # disable gradient calculation to speed up the computation as we don't need gradients for evaluation
        for data, target in loaders['test']:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += loss_fn(output, target).item()              # sum up batch loss
            pred = output.argmax(dim = 1, keepdim = True)            # get the index of the max log-probability. Argmax is used to get the index of the max value in a tensor
            correct += pred.eq(target.view_as(pred)).sum().item()    # compare the prediction with the target and sum up the correct predictions
    
    test_loss /= len(loaders['test'].dataset)                        # calculate the average loss
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(loaders['test'].dataset),
        100. * correct / len(loaders['test'].dataset)))

False


In [2]:
import torch

print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA version:", torch.version.cuda)
    print("CUDA device count:", torch.cuda.device_count())
    print("Current CUDA device:", torch.cuda.current_device())
    print("CUDA device name:", torch.cuda.get_device_name(torch.cuda.current_device()))


CUDA available: False


In [13]:
for epoch in range(1, 11):
    train(epoch)
    test()


Test set: Average loss: 0.0013, Accuracy: 9609/10000 (96%)



KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

model.eval()

data, target = test_data[1]

data = data.unsqueeze(0).to(device)
output = model(data)
pred = output.argmax(dim = 1, keepdim = True).item()

print(f'Prediction: {pred}')

plt.imshow(data.cpu().numpy().squeeze(), cmap = 'gray')
plt.show()

NameError: name 'model' is not defined

In [None]:
device

NameError: name 'device' is not defined