In [None]:
import torch
import torchvision

import numpy as np
import string
import cv2
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold

In [None]:
def get_device():
    if torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'
    return device
device = get_device()
device

# Load Data

In [None]:
transform = torchvision.transforms.Compose([torchvision.transforms.Grayscale(num_output_channels=1),
                                            torchvision.transforms.ToTensor()])

alphabet_set = torchvision.datasets.ImageFolder(root='./AlphabetDataset/',
                                                transform=transform)

print("Class names and corresponding labels:\n", alphabet_set.class_to_idx)
print("Total number of instances:", len(alphabet_set.targets))

# View Data

In [None]:
image, label = alphabet_set[8269] # label b/w 0, 26

alphabet_mapper = list(string.ascii_uppercase) # ['A', 'B',..., 'Z']

print('label:', alphabet_mapper[label])
plt.imshow(image.numpy()[0], cmap='gray')

In [None]:
c = 0
for i in alphabet_set:
    image, label = i
    if alphabet_mapper[label] == 'B':
        c += 1
        plt.imshow(image[0], cmap='gray')
        plt.show()
    if c > 20:
        break

# the Network

In [None]:
class Network(torch.nn.Module):
    def __init__(self):
        
        super(Network, self).__init__()
        
        number_of_classes = 26
        
        self.conv1 = torch.nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=(2, 2))
        self.conv2 = torch.nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.max_pool = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = torch.nn.Linear(in_features=16 * 5 * 5, out_features=120)
        self.fc2 = torch.nn.Linear(in_features=120, out_features=84)
        self.fc3 = torch.nn.Linear(in_features=84, out_features=number_of_classes)    
        
        self.relu = torch.nn.ReLU()
        self.dpt = torch.nn.Dropout(0.4) # 40% probability
    
    def forward(self, t):
        t = self.conv1(t) # 1 * 28 * 28 -> 6 * 28 * 28 
        t = self.relu(t)

        t = self.max_pool(t) # 6 * 14 * 14 
        
        t = self.conv2(t) # 16 * 10 * 10
        t = self.relu(t)

        t = self.max_pool(t) # 16 * 5 * 5
        
        t = self.dpt(t)
        
        t = t.reshape(-1, 16 * 5 * 5) # flatten in order to feed to the FC layers 

        t = self.fc1(t) # 400 -> 120
        t = self.relu(t)

        t = self.fc2(t) # 120 -> 84
        t = self.relu(t)
        
        t = self.fc3(t) # 84 -> 26 (number of classes)
        
        return t

# Train

In [None]:
batch_size = 1024
total_epochs = 2
learning_rate = 0.001


net = Network() #create Object
net.to(device)
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate) 
criterion = torch.nn.CrossEntropyLoss() # Defining the loss calculating method for our Network object. We use cross entropy loss.

train_loader = torch.utils.data.DataLoader(alphabet_set, batch_size=batch_size, shuffle=True) 

net = net.train()

for epoch in range(1, total_epochs + 1):

    batch_count = 0
    
    for batch in train_loader: #Get a Batch
        
        batch_count += 1

        images, labels = batch
        images = images.to(device)
        labels = labels.to(device)

        preds = net(images) #Pass Batch; preds.shape -> (batch_size, number_of_classes)
        loss = criterion(preds, labels) # Calculate Loss

        optimizer.zero_grad()
        loss.backward() #Calculate Gradients
        optimizer.step() #Update Weights
        
        print(f'epoch: {epoch}/{total_epochs} | batch: {batch_count}/{len(train_loader)} | correct preds: {preds.argmax(dim=1).eq(labels).sum().item()}/{len(labels)}')

    print('Epoch:', epoch, '| Current loss:', loss.item())                                          

# Save the model

In [None]:
path = "./saved_models/my_cnn_model.pt" 
torch.save(net.state_dict(), path)

# 10 Fold Cross Validation

In [None]:
def ten_fold_cv(the_dataset, total_epochs, learning_rate, batch_size):
    
    kf = KFold(n_splits=10, shuffle=True) # create the 10 folds

    total_accuracy = 0
    fold = 1
    for train_index, test_index in kf.split(the_dataset): # each fold
        train_set = torch.utils.data.Subset(the_dataset, train_index) 
        test_set = torch.utils.data.Subset(the_dataset, test_index) 

        train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)

        net = Network() # a new object is created for each fold
        net = net.to(device)
        optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate) # specific to this net object of this fold 
        criterion = torch.nn.CrossEntropyLoss() # Defining the loss calculating method for our Network object. We use cross entropy loss.

        for epoch in range(1, total_epochs + 1): 
            
            batch_count = 0
            
            for batch in train_loader: # each Batch (0 to total_instances/batch_size)
                
                batch_count += 1
                
                images, labels = batch
                images = images.to(device)
                labels = labels.to(device)

                preds = net(images) # Pass Batch (Forward pass)
                loss = criterion(preds, labels) 

                optimizer.zero_grad()
                loss.backward() # Calculate Gradients
                optimizer.step() # Update Weights
                
                print(f'fold: {fold}/10 | epoch: {epoch}/{total_epochs} | batch: {batch_count}/{len(train_loader)} | correct preds: {preds.argmax(dim=1).eq(labels).sum().item()}/{len(labels)}')
            
            print(f'\nfold: {fold}/10 | epoch: {epoch}/{total_epochs} | loss: {loss.item()}\n')
            # print()

        #testing
        test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size)
        total_correct_predictions = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)

            preds = net(images)
            total_correct_predictions += preds.argmax(dim=1).eq(labels).sum().item()

        accuracy = total_correct_predictions / len(test_set)
        
        
        print(f'fold: {fold}/10 | correct predictions: {total_correct_predictions}/{len(test_set)} | accuracy: {accuracy}')
        print()

        total_accuracy += accuracy

        fold += 1


    final_accuracy = total_accuracy / 10
    print('\n\nFinal result:')
    print('Accuracy:', final_accuracy)
    return final_accuracy

In [None]:
ten_fold_cv(the_dataset=alphabet_set, total_epochs=2, learning_rate=0.001, batch_size=1024)