In [1]:
# import necessary packages
from scipy.io import loadmat
import numpy as np

import torch
from torch import nn, optim
import torch.nn.functional as F
from torchvision import models
from torch.utils.data import Dataset, DataLoader

In [2]:
def load_data(dataset):
    return loadmat(dataset)

In [3]:
# Load the dataset
training_data = load_data("Dataset/SVHN/train_32x32.mat")
test_data = load_data("Dataset/SVHN/test_32x32.mat")

In [4]:
x_train = training_data["X"]
x_test = test_data["X"]

y_train = training_data["y"]
y_test = test_data["y"]

In [5]:
# custom dataset
class Dataset(Dataset):
    
    def __init__(self, features, labels, transform = None):
        self.features = torch.tensor(features).permute([3, 2, 0, 1])
        self.labels = torch.LongTensor(labels).squeeze_()
        self.labels[self.labels == 10] = 0
        
        self.transform = transform
        
    def __len__(self):
        return (len(self.labels))
    
    def __getitem__(self, index):
        
        sample = self.features[index], self.labels[index]

        if self.transform:
            sample = self.transform(sample)
        
        return sample

In [6]:
class transform(object):
    def __init__(self, divide_by = 255.0):
        self.divide_by = divide_by
        
    def __call__(self, sample):
        x = sample[0]
        y = sample[1]
        x = x / self.divide_by
        sample = x, y
        return sample

In [7]:
# train_loader and test_loader

transform = transform()

train_data = Dataset(x_train, y_train, transform = transform)
train_loader = DataLoader(train_data, batch_size = 512, shuffle = True)

test_data = Dataset(x_test, y_test, transform = transform)
test_loader = DataLoader(test_data, batch_size = 512, shuffle = True)

In [8]:
for i, j in train_loader:
    print(i.shape)
    break

torch.Size([512, 3, 32, 32])


In [9]:
class CNN(nn.Module):
    
    def __init__(self):
        super(CNN, self).__init__()
        
        # 1st Convolution
        self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 6, kernel_size = 3, stride = 1, padding = 2)
        nn.init.normal_(self.conv1.weight, std = 0.001)
        nn.init.constant_(self.conv1.bias, 0)
        self.bn1 = nn.BatchNorm2d(6)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        
    
        # 2nd Convolution
        self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 12, kernel_size = 3, stride = 1, padding = 1)
        nn.init.normal_(self.conv2.weight, std = 0.001)
        nn.init.constant_(self.conv2.bias, 0)
        self.bn2 = nn.BatchNorm2d(12)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        
        
        # 3rd Convolution
        self.conv3 = nn.Conv2d(in_channels = 12, out_channels = 36, kernel_size = 3, stride = 1, padding = 1)
        nn.init.normal_(self.conv3.weight, std = 0.001)
        nn.init.constant_(self.conv3.bias, 0)
        self.bn3 = nn.BatchNorm2d(36)
        self.relu3 = nn.ReLU()
        self.maxpool3 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        
        
        # fully connected layer
        self.fc1 = nn.Linear(36 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, 64)
        self.fc3 = nn.Linear(64, 10)
        
        
        # dropout
        self.dropout = nn.Dropout(0.3)
        
        
    def forward(self, x):
        # conv layers
        x = self.maxpool1(self.relu1(self.bn1(self.conv1(x))))
        x = self.maxpool2(self.relu2(self.bn2(self.conv2(x))))
        x = self.maxpool3(self.relu3(self.bn3(self.conv3(x))))
        
        
        # reshape the image
        x = x.view(-1, 36 * 4 * 4)
        x = self.dropout(self.fc1(x))
        x = self.dropout(self.fc2(x))
        x = F.log_softmax(self.fc3(x), dim = 1)
        
        
        return x

In [17]:
# trying the model
"""model = CNN()
criterion = nn.NLLLoss()
images, labels = next(iter(train_loader))

print(images.shape, labels.shape)

logits = model(images)
print(logits.shape)
loss = criterion(logits, labels)"""

torch.Size([512, 3, 32, 32]) torch.Size([512])
torch.Size([512, 10])


In [11]:
# set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [12]:
# Function to calculate accuracy
def get_accuracy(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            
            # model out was log_softmax, doing torch_exp will convert them to softmax
            ps = torch.exp(outputs)
            
            # get the class with highest probability
            _, predicted = ps.topk(1, dim = 1)
            total += labels.size(0)
            correct += (predicted == labels.view(*predicted.shape)).sum().item()
 
    model.train()
    return 100 * correct / total
    

In [15]:
model = CNN().to(device)
model.train()

# loss function as the model output is log_softmax
loss_fn = nn.NLLLoss()

# optimizer
LEARNING_RATE = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)

# number of epochs
NUM_EPOCHS = 50

for epoch in range(NUM_EPOCHS):   
    for inputs,labels in (train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        
        # clear the accumulated grads
        optimizer.zero_grad() 
        
        output = model(inputs)
        loss = loss_fn(output, labels)
        
        # compute gradients
        loss.backward() 
        
        # optimize
        optimizer.step() 

    if epoch % 1 == 0:
        accucacy = get_accuracy(model,test_loader)
        print(f"Epoch: {epoch + 1}, Training Loss: {loss.item()}, Test Accuracy: {accucacy}") 

Epoch: 1, Training Loss: 0.5377659201622009, Test Accuracy: 73.22526121696373
Epoch: 2, Training Loss: 0.32063308358192444, Test Accuracy: 81.2922556853104
Epoch: 3, Training Loss: 0.2149576097726822, Test Accuracy: 84.17332513829133
Epoch: 4, Training Loss: 0.7719606757164001, Test Accuracy: 85.33343577135832
Epoch: 5, Training Loss: 0.687445878982544, Test Accuracy: 85.14904732636755
Epoch: 6, Training Loss: 0.6759110689163208, Test Accuracy: 85.90580823601721
Epoch: 7, Training Loss: 0.1418183445930481, Test Accuracy: 87.20036877688999
Epoch: 8, Training Loss: 0.6663283109664917, Test Accuracy: 88.14535955746773
Epoch: 9, Training Loss: 0.1753505915403366, Test Accuracy: 86.26306084818685
Epoch: 10, Training Loss: 0.38802486658096313, Test Accuracy: 84.3999692685925
Epoch: 11, Training Loss: 0.37755638360977173, Test Accuracy: 87.73048555623848
Epoch: 12, Training Loss: 0.34831422567367554, Test Accuracy: 84.5958819913952
Epoch: 13, Training Loss: 0.3810703158378601, Test Accuracy: 