In [1]:
# import
import torch
import torchvision.datasets as datasets
import torch.nn as nn           # All NN module
import torch.nn.functional as F # has functions w/o paramters like relu and tanh
from torch.utils.data import DataLoader
import torch.optim as optim
import torchvision.transforms as transforms # it has transforms that can be applied

In [2]:
# Model
class CNN(nn.Module):
    def __init__(self, in_channels = 1, num_classes = 10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1,out_channels=8, kernel_size=(3,3), stride=(1,1), padding=(1,1)) # same convolution
        self.pool = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3))
        self.fc1 = nn.Linear(16*6*6, num_classes)
            
            
    def forward(self,x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x) # reusing the same pooling
        x = x.reshape(x.shape[0],-1)
        x = self.fc1(x)
        
        return x

            

in_channels = 1            
num_classes = 10
model = CNN(in_channels = in_channels , num_classes = num_classes)         

x = torch.rand((8,1,28,28))
y = model(x)

print(y.shape)
#print(y[0])

torch.Size([8, 10])


In [3]:
# set device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('device: ', device)


device:  cpu


In [4]:
# Hyperparameters
batch_size = 8
learning_rate = 0.001
num_epochs = 2
num_classes = 10


In [5]:
# initialize network
model = CNN(in_channels = in_channels, num_classes = num_classes)        
model = model.to(device)

In [6]:
# dataset

train_dataset = datasets.MNIST(root = 'dataset/', train = True, transform = transforms.ToTensor() ,download = True)
train_loader = DataLoader(train_dataset, batch_size= batch_size, shuffle = True)
test_dataset = datasets.MNIST(root = 'dataset/', train = False, transform = transforms.ToTensor() ,download = True)
test_loader = DataLoader(test_dataset, batch_size= batch_size, shuffle = True)


In [7]:
# loss and regularizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr= learning_rate)


In [8]:
# train
for epoch in range(num_epochs):
    for batch_idx, (data,targets) in enumerate(train_loader):
        # Get cuda if possible
        data = data.to(device = device)
        targets = targets.to(device = device)
        
        
        # forward
        scores = model(data)
        loss = criterion(scores, targets)
        
        # backward
        optimizer.zero_grad()
        loss.backward()
        
        # Gradient Descent/ Adam step
        optimizer.step()
        

In [9]:
# check train and test accuracy
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()
    
    with torch.no_grad():   
        for x,y in loader:
            x = x.to(device = device)
            y = y.to(device = device)

            scores = model(x)
            _,preds = scores.max(1)

            num_correct += (preds==y).sum()
            num_samples += preds.shape[0]
        
    print(f'Got {num_correct}/{num_samples} correct with accuracy of {float(num_correct)*100/float(num_samples):.2f} ')

In [10]:
check_accuracy(train_loader, model)

Got 59085/60000 correct with accuracy of 98.47 


In [11]:
check_accuracy(test_loader, model)

Got 9821/10000 correct with accuracy of 98.21 


In [14]:
# check train and test accuracy
def check_accuracy(loader, model):
    if loader.dataset.train:
        print('Checking on training data')
    else:
        print('Checking on test data')
    num_correct = 0
    num_samples = 0
    model.eval()
    with torch.no_grad():
        for x,y in loader:
            x = x.to(device = device)
            y = y.to(device = device)

            scores = model(x)
            _,preds = scores.max(1)

            num_correct += (preds==y).sum()
            num_samples += preds.shape[0]
        
    print(f'Got {num_correct}/{num_samples} correct with accuracy of {float(num_correct)*100/float(num_samples):.2f} ')

In [15]:
check_accuracy(train_loader, model)

Checking on training data
Got 59085/60000 correct with accuracy of 98.47 
