Similar to the basic one, works better than fully connected NNs and better for images too.

In [1]:
# 0 Importing the libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

For this, we have a few key stuff to worry about. 

$$
n_{out} = \left\lfloor \frac{n_{in} + 2p - k}{s} \right\rfloor + 1
$$

\begin{align*}
n_{in} &: \text{ number of input features} \\
n_{out} &: \text{ number of output features} \\
k &: \text{ convolution kernel size} \\
p &: \text{ convolution padding size} \\
s &: \text{ convolution stride size} 
\end{align*}


`kernel_size=(3,3), stride=(1,1),padding=(1,1)`

can ensure that $n_{out}$ will be equal to $n_{in}$

In [2]:
# 1 Creating a Fully Connected Network

## Simple one
class BasicNeuralNetwork(nn.Module):
    def __init__(self, input_size, num_classes): #input size will be 28*28=784, num_classes will be 10 for mnist
        super(BasicNeuralNetwork, self).__init__() # calls the init method of the parent class (nn.Module), we run the init of that method.
        #for a small network, we can do two laters:
        self.l1 = nn.Linear(input_size,50) #50 neurons in the first layer
        self.l2 = nn.Linear(50 , num_classes) #num_classes neurons in the second layer

    #define the forward method that will be used to pass the data through the network
    def forward(self, x):
        x = F.relu(self.l1(x)) #relu activation function
        x = self.l2(x)
        return x

## Convolutional Neural Network:
class ConvolutionalNeuralNetwork(nn.Module):
    def __init__(self, in_channels=1, num_classes=10):
        super(ConvolutionalNeuralNetwork, self).__init__()
        # now we create the layers:
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3,3), stride=(1,1),padding=(1,1)) #padding is used to keep the size of the image the same
        self.pool = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))  # pooling layer, which is used to reduce the size of the image (in this case it'll reduce it by half)
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), stride=(1,1),padding=(1,1))
        # and now a fully connected layer:
        self.fc1 = nn.Linear(16*7*7, num_classes) # 7*7 is the size of the image after the pooling layer, because we have 2 pooling layers, the size is reduced by 2 in each dimension
    
    def forward(self, x):
        x = F.relu(self.conv1(x)) # we pass the data through the first convolutional layer (size of the image is kept the same)
        x = self.pool(x)        # we pass the data through the pooling layer (size is now size/2)
        x = F.relu(self.conv2(x)) # now the data size is reduced by half, so we pass it through the second convolutional layer (size is kept the same)
        x = self.pool(x) # we pass the data through the pooling layer, so the size is reduced by half again (now it's 7*7)
        x = x.reshape(x.shape[0], -1) # we flatten the data to pass it through the fully connected layer
        x = self.fc1(x) # we pass the data through the fully connected layer
        return x
        


In [3]:
# tesitng the cnn:
x = torch.randn(64,1,28,28) # 64 images, 1 channel, 28*28 pixels
model = ConvolutionalNeuralNetwork()
print(model(x).shape)

torch.Size([64, 10])


In [4]:
# testing:

model = BasicNeuralNetwork(784, 10)
x = torch.randn(64, 784)
print(model(x).shape)

torch.Size([64, 10])


In [5]:
# 2 Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
# hyperparameters:
input_size = 784
num_classes = 10
learning_rate = 0.001 #this means that we will update the weights by 0.001*gradient at each step of the optimization process (backpropagation)
batch_size = 64
num_epochs = 10 #one epoch is when the entire dataset is passed through the network once

#for CNN:
in_channels = 1

In [7]:
# 3 Load Data
train_dataset = datasets.MNIST(root='dataset/', train=True, transform=transforms.ToTensor(), download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = datasets.MNIST(root='dataset/', train=False, transform=transforms.ToTensor(), download=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to dataset/MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 17.2MB/s]


Extracting dataset/MNIST\raw\train-images-idx3-ubyte.gz to dataset/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to dataset/MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 2.25MB/s]

Extracting dataset/MNIST\raw\train-labels-idx1-ubyte.gz to dataset/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz





Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to dataset/MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 14.0MB/s]


Extracting dataset/MNIST\raw\t10k-images-idx3-ubyte.gz to dataset/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to dataset/MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<?, ?B/s]

Extracting dataset/MNIST\raw\t10k-labels-idx1-ubyte.gz to dataset/MNIST\raw






In [8]:
# 4 Initialize network
model = ConvolutionalNeuralNetwork().to(device)

In [9]:
# 5 Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [11]:
# 6 Train Network

from tqdm import tqdm

for epoch in tqdm(range(num_epochs), desc="Training Epochs"):
    """ print(f"Epoch: {epoch+1}/{num_epochs}") """
    for batch_index, (data, targets) in enumerate(train_loader): #enumerate gives us the index of the batch, and the data and targets. data is the images, targets are the labels
        # make the data to the device
        data = data.to(device=device)
        targets = targets.to(device=device)
        # Flatten the data
        """ data = data.reshape(data.shape[0],-1) """
        """ because we are using a CNN, we don't need to flatten the data, the data is already flattened by the last layer of the CNN """
        
        # now for forward pass:
        scores = model(data) #pass the data through the model
        loss = criterion(scores, targets) #calculate the loss
        
        #backward pass:
        optimizer.zero_grad() #set the gradients to zero before calculating the gradients
        loss.backward() #calculate the gradients
        optimizer.step() #update the weights (gradient descent step)
        

Training Epochs: 100%|██████████| 10/10 [03:03<00:00, 18.40s/it]


In [None]:
# 7 Check accuracy on training & test to see how good our model is

def check_accuracy(loader, model):
    if(loader.dataset.train):
        print('Checking accuracy on training data')
    else:
        print('Checking accuracy on test data')
    num_correct = 0
    num_samples = 0
    model.eval() #tells pytorch that we are in evaluation mode
    
    with torch.no_grad(): #we don't need to calculate the gradients when we are evaluating the model
        for x, y in tqdm(loader, desc="Checking accuracy"):
            x= x.to(device=device)
            y = y.to(device=device)
            """ x = x.reshape(x.shape[0], -1) """
            """ same thing as before, we don't need to flatten the data """

            scores = model(x) # pass the data through the model
            _, predictions = scores.max(1) #get the index of the max value, which in this case is the predicted class label (0-9), that has the highest score
            num_correct += (predictions == y).sum() #sum the number of correct predictions
            num_samples += predictions.size(0) #total number of samples, which is the batch size
        
        print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')
    
    model.train() #put the model back to training mode
    
check_accuracy(train_loader, model)
check_accuracy(test_loader, model)
        

Checking accuracy on training data


Checking accuracy: 100%|██████████| 938/938 [00:08<00:00, 113.27it/s]


Got 59541 / 60000 with accuracy 99.23
Checking accuracy on test data


Checking accuracy: 100%|██████████| 157/157 [00:01<00:00, 86.88it/s]

Got 9861 / 10000 with accuracy 98.61



