---
> (Call load_dataset before any operation because of RAM overflow issue) 
---

---
> ***Import Modules***
---

In [None]:
# .cuda() for using cuda enabled NVIDIA GPU to compute
# erase .cuda() if you haven't cuda enabled NVIDIA GPU

import torch
from torchvision import transforms
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
import numpy as np
import os.path
from Model import Model
import pandas as pd 
from torch.utils.data import Dataset
import csv
import csv_loader

to_bangla = {}

---
> ***Function for load and save checkpoints***
---




In [None]:
# saving training checkpoints
def save_checkpoint(epoch, model_state_dict, criterion_state_dict, optim_state_dict, 
                    trainloss_list, trainac_list, valloss_list, valac_list, best_valloss, PATH):
    torch.save({
        'epoch':epoch,
        'model_state_dict':model_state_dict,
        'criterion_state_dict':criterion_state_dict,
        'optimizer_state_dict':optim_state_dict,
        'trainloss_list':trainloss_list,
        'trainac_list':trainac_list,
        'valloss_list':valloss_list,
        'valac_list':valac_list,
        'best_valloss':best_valloss
    }, PATH)


# loading training checkpoints
def load_checkpoint(model, criterion, optimizer, PATH):
    checkpoint = torch.load(PATH)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    criterion.load_state_dict(checkpoint['criterion_state_dict'])

    return checkpoint['epoch'], model, criterion, optimizer, checkpoint['trainloss_list'], checkpoint['trainac_list'], checkpoint['valloss_list'], checkpoint['valac_list'], checkpoint['best_valloss']

---
> Function to Load Dataset
---

In [None]:
# loading datasets
def load_dataset():
    global trainset, valset, train_dataloader, val_dataloader, test_dataloader
 
    trainset_path = 'train_set.csv'
    testset_path = 'test_set.csv'
    # read from csv
    trainset, testset = csv_loader.read_from_csv(trainset_path, testset_path)

    # spilitting trainset by assigning 10% to valset and 90% to trainset
    valset_size = int(0.1 * len(trainset)); rest_size = len(trainset) - valset_size
    valset, trainset = torch.utils.data.random_split(trainset, [valset_size, rest_size])

    aug_trainset1 = csv_loader.read_from_csv_aug_lu(trainset_path)
    aug_trainset2 = csv_loader.read_from_csv_aug_ru(trainset_path)
    aug_trainset3 = csv_loader.read_from_csv_aug_lb(trainset_path)
    aug_trainset4 = csv_loader.read_from_csv_aug_rb(trainset_path)

    aug_trainset = torch.utils.data.ConcatDataset([aug_trainset1, aug_trainset2,aug_trainset3, aug_trainset4])

    #choosing random 60000 images
    #aug_trainset, bakp = torch.utils.data.random_split(aug_trainset, [60000, len(aug_trainset)-60000])

    aug_size = int(0.1 * len(aug_trainset)); rest_size = len(aug_trainset) - aug_size
    aug_valset, aug_trainset = torch.utils.data.random_split(aug_trainset, [aug_size, rest_size])

    trainset = torch.utils.data.ConcatDataset([trainset, aug_trainset])
    valset = torch.utils.data.ConcatDataset([valset, aug_valset])

    # construct dataloaders
    train_dataloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(valset, batch_size=128, shuffle=False)
    test_dataloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False)

    val_label_set = set()
    for i in range(0, len(valset)):
        val_label_set.add(str(valset[i][1]))
    print("Distinct character in valset:", len(val_label_set))
    print(len(trainset), len(valset))


> Initialize Model

In [None]:
model = Model().cuda()

> Call Dataset Loader

In [None]:
load_dataset()

> Function to plot Train and validation loss and accuracy

In [None]:
# graphical representation of train loss & validation loss
def plot_train_and_validation_data(epoch, train_data, valid_data, type="Loss"):

    # plotting Train Loss & Validation Loss data
    fig = plt.figure(figsize=(13,5))
    plt.plot(np.arange(0, epoch), train_data, label="Train " + type, linewidth=3)
    plt.plot(np.arange(0, epoch), valid_data, label="Validation " + type, linewidth=3)
    plt.xlabel('Epochs')
    plt.ylabel(type)
    plt.title(type + " Plots")
    if type == "Loss":
        plt.legend(loc='upper right')
    else:
        plt.legend(loc='lower right')

    leg_texts = plt.legend().get_texts()
    plt.setp(leg_texts, fontsize='x-large')

    plt.show()

> Train and validation function


In [None]:
# this function will perform training & validation
def train_and_validation(model):
    global trainset, valset, train_dataloader, val_dataloader

    # defining loss criterion function
    criterion = torch.nn.CrossEntropyLoss()

    # defining optimizer with learning rate=0.001
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    trainLoss = list(); trainAccuracy = list()
    valLoss = list(); valAccuracy = list()
    bestValLoss = 100
    epoch = 0
    totalEpoch =  100   # higher value will perform more training but take more time
    checkpoint_path = './data/checkpoint_bd_char_testing.pt'

    # Restoring training checkpoint if available
    if(os.path.isfile(checkpoint_path)):
        epoch, model, criterion, optimizer, trainLoss, trainAccuracy, valLoss, valAccuracy, bestValLoss = load_checkpoint(model, criterion, optimizer, checkpoint_path)

    while epoch <= totalEpoch:
        totalTrainLoss = 0
        totalValLoss = 0; totalMatched = 0

        # training mode select
        model.train()
        model.cuda()

        # training starts
        for iter, (img, label) in enumerate(train_dataloader):
            optimizer.zero_grad()

            # pass image to model to get prediction
            prediction = model(img)

            # calculate loss
            loss = criterion(prediction, label).cuda()
            totalTrainLoss += loss.item()

            prediction = torch.nn.functional.softmax(prediction, dim=1)
            for i, pred in enumerate(prediction):
                if label[i] == torch.max(pred.data, 0)[1]:
                    totalMatched += 1

            # do a backward pass using loss value and use Adam optimizer to modify the model parameters
            loss.backward()
            optimizer.step()

        taccuracy = totalMatched / len(trainset)
        # avergae train loss
        totalTrainLoss = totalTrainLoss / (iter + 1)
        trainLoss.append(totalTrainLoss); trainAccuracy.append(taccuracy)

        # validation mode select
        model.eval()
        model.cuda()
        totalMatched = 0

        # validation starts
        for iter, (image, label) in enumerate(val_dataloader):
            # pass image to model to get prediction
            prediction = model(image)

            loss = criterion(prediction, label).cuda()
            totalValLoss += loss.item()

            prediction = torch.nn.functional.softmax(prediction, dim=1)
            for i, pred in enumerate(prediction):
                if label[i] == torch.max(pred.data, 0)[1]:
                    totalMatched += 1
        
        accuracy = totalMatched / len(valset)
        totalValLoss = totalValLoss / (iter+1)
        valLoss.append(totalValLoss); valAccuracy.append(accuracy)

        print("Completed Loop No = ", epoch)
        print("Train Loss:", totalTrainLoss, ", Validation Loss:", totalValLoss, ", Train Accuracy: ", taccuracy, ", Val Accuracy: ", accuracy)
        epoch += 1

        if totalValLoss < bestValLoss:
            bestValLoss = totalValLoss
            print("Saving Model State with Validation Loss: ", totalValLoss)
            torch.save(model.state_dict(), "./data/model_bd_char_testing.dth")

        # storing checkpoints
        save_checkpoint(epoch, model.state_dict(), criterion.state_dict(), optimizer.state_dict(),
                        trainLoss, trainAccuracy, valLoss, valAccuracy, bestValLoss, checkpoint_path)

    print("\nTraining & Validation Completed\n")

    plot_train_and_validation_data(epoch, trainLoss, valLoss)
    plot_train_and_validation_data(epoch, trainAccuracy, valAccuracy, "Accuracy")

> Training Starts

In [None]:
train_and_validation(model)

>Test Function

In [None]:
# this function will perform the testing of the trained model
def test(model):
    global test_dataloader

    # loading model & selecting mode
    model.load_state_dict(torch.load("./data/model_bd_char_testing.dth"))
    model.eval().cuda()

    result = list(); rows = list()
    tot = 0; correct = 0

    for iter, (img, label) in enumerate(test_dataloader):
        predict = model(img.cuda())
        predict = torch.nn.functional.softmax(predict, dim=1)

        for i, p in enumerate(predict):
            tot += 1
            result.append((img[i], torch.max(p.data, 0)[1]))

            Label = torch.max(p.data, 0)[1]
            #rows.append([tot, to_bangla[ str(Label.item()) ] ])

            if label[i] == Label:
                correct += 1

    test_accuracy = correct / tot
    print("Test Accuracy: ", test_accuracy)

>Call Test

In [None]:
    test(model)