#Setting GPU

In [None]:
import torch
device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f'We are training on {device} for this experiment')
assert 'cuda' in repr(device),'GPU is not selected in hardware accelerator dropdown '

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
your_google_drive_path = "/content/drive/MyDrive/checkpoints/"
import os
assert os.path.isdir(your_google_drive_path), f"{your_google_drive_path} is not a valid location"

In [None]:
%matplotlib inline
import torch
import torchvision
import torchvision.transforms as transforms

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5), (0.5))])

batch_size = 8

#Get dataset class object corresponding to train split
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)

#Create data loader from train dataset object
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

#Get dataset class object corresponding to test split
testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)

#Create data loader from test dataset object
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('0','1', '2', '3', '4', '5', '6', '7', '8', '9')

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
dataiter = iter(trainloader)
images, labels = next(dataiter)

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))

In [None]:
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 64, 3,padding= 1)
        # self.pool = nn.MaxPool2d(2, 2)
        self.bn1 = nn.BatchNorm2d(64)
        self.bn2 = nn.BatchNorm2d(128)
        self.bn4 = nn.BatchNorm2d(512)
        self.bn3 = nn.BatchNorm2d(256)
        self.conv2 = nn.Conv2d(64, 128, 3,padding= 1)
        self.conv3 = nn.Conv2d(128, 128, 3,padding= 1)
        self.conv4 = nn.Conv2d(128, 128, 3, padding= 1)
        self.conv5 = nn.Conv2d(128, 256, 3,padding= 1)
        self.conv6 = nn.Conv2d(256,512, 3, padding=1)
        self.conv7 = nn.Conv2d(512, 512, 3, padding=1)
        self.conv8 = nn.Conv2d(512, 512, 3,padding= 1)
        self.fc1 = nn.Linear(512, 10)
        self.temp = nn.Conv2d(128, 512, 3, padding= 1)
    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        y = x
        x = F.relu(self.bn2(self.conv3(x)))
        x = F.relu(self.bn2(self.conv4(x)))
        z= self.temp(x)
        x = F.relu(self.bn3(self.conv5(x + y)))  # Add y before pooling
        x = F.relu(self.bn4(self.conv6(x)))
        y = x
        x = F.relu(self.bn4(self.conv7(x + z)))  # Add z before convolution
        x = F.relu(self.bn4(self.conv8(x)))
        x = F.adaptive_avg_pool2d(x + y, (1, 1))  # Add y before pooling
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.softmax(x, dim=1)
        return x
net = Net().to(device)

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

In [None]:
def validator(testloader=None,net=None):
    correct = 0
    total = 0
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)

            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            # perform max along dimension 1, since dimension 0 is batch dimension
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the {total} test images: {100 * correct // total} %')
    return correct/total

In [None]:
start_epoch=0
end_epoch=10

In [None]:
best_accuracy = -1.0
for epoch in range(start_epoch,end_epoch):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate( trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

    current_accuracy = validator(testloader=testloader,net=net)
    if current_accuracy>best_accuracy:
        best_accuracy = current_accuracy

        torch.save(
            {'epoch':epoch,
             'model_state_dict': net.state_dict(),
             'optimizer_state_dict': optimizer.state_dict()
             },

             your_google_drive_path+'best_model.pth')
    #Save model as checkpoint
    torch.save(
        {'epoch':epoch,
         'model_state_dict': net.state_dict(),
         'optimizer_state_dict': optimizer.state_dict()
         },
         your_google_drive_path+'checkpoint.pth')
print('Finished Training')

In [None]:
dataiter = iter(testloader)
images, labels = next(dataiter)
plt.subplots(figsize=(2,4))
# print images
plt.imshow(torchvision.utils.make_grid(images[:8],nrow=2).permute(1,2,0))
plt.show()
print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(8)))

In [None]:
net_best = Net().to(device)
checkpoint = torch.load(your_google_drive_path+'best_model.pth')
net_best.load_state_dict(checkpoint['model_state_dict'])


images = images.to(device)
labels = labels.to(device)

outputs = net_best(images)

In [None]:
_, predicted = torch.max(outputs, 1)
print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(8)))
print('Predicted:   ', ' '.join(f'{classes[predicted[j]]:5s}' for j in range(8)))


Native Dataset

In [None]:
from PIL import Image
from torch.utils.data import Dataset
class CustomDataset(Dataset):
    def __init__(self, data_folder, transform=None):
        self.data_folder = data_folder
        self.transform = transform
        self.classes = os.listdir(data_folder)
        self.data = self._load_data()

    def _load_data(self):
        data = []
        for i, class_folder in enumerate(self.classes):
            class_path = os.path.join(self.data_folder, class_folder)
            for img_name in os.listdir(class_path):
                img_path = os.path.join(class_path, img_name)
                data.append((img_path, i))
        return data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        img = Image.open(img_path)

        if self.transform:
            img = self.transform(img)

        return img, label

In [None]:
transform_1 = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5), (0.5))])
data=CustomDataset(data_folder="/content/drive/MyDrive/HINDI_NUMERALS/",transform=transform_1)

NameError: name 'transforms' is not defined

In [None]:
train_size = int(0.8 * len(data))
test_size = len(data) - train_size
batch_size=8
# Randomly split the dataset into train and test sets
train_dataset, test_dataset = torch.utils.data.random_split(data, [train_size, test_size])
trainloader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
testloader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)
classes = data.classes
dataiter = iter(trainloader)
images, labels = next(dataiter) #4 images will be there since batch size is 4, images have [batch,channel,heigh,width]
# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(8)))

In [None]:
start_epoch=0
end_epoch=10

In [None]:
del inputs,images,labels,loss,optimizer

In [None]:
net1=Net().to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net1.parameters(), lr=0.001, momentum=0.9)

In [None]:
best_accuracy = -1.0
for epoch in range(start_epoch,end_epoch):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate( trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net1(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

    current_accuracy = validator(testloader=testloader,net=net1)
    if current_accuracy>best_accuracy:
        best_accuracy = current_accuracy

        torch.save(
            {'epoch':epoch,
             'model_state_dict': net1.state_dict(),
             'optimizer_state_dict': optimizer.state_dict()
             },

             your_google_drive_path+'hindi_best_model.pth')

    #Save model as checkpoint
    torch.save(
        {'epoch':epoch,
         'model_state_dict': net1.state_dict(),
         'optimizer_state_dict': optimizer.state_dict()
         },
         your_google_drive_path+'hindi_checkpoint.pth')
print('Finished Training')

In [None]:
dataiter = iter(testloader)
images, labels = next(dataiter)
# print images
imshow(torchvision.utils.make_grid(images))
print('Predicted:   ', ' '.join(f'{classes[predicted[j]]:5s}' for j in range(8)))

In [None]:
net_best = Net().to(device)
checkpoint = torch.load(your_google_drive_path+'hindi_best_model.pth')
net_best.load_state_dict(checkpoint['model_state_dict'])


images = images.to(device)
labels = labels.to(device)

outputs = net_best(images)

In [None]:
_, predicted = torch.max(outputs, 1)
print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(8)))
print('Predicted:   ', ' '.join(f'{classes[predicted[j]]:5s}' for j in range(8)))

#Training the model using Fine Tuning
Initial weights are taken from classification model of  MNIST Model

In [None]:
net2=Net().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net2.parameters(), lr=0.001, momentum=0.9)
checkpoint = torch.load(your_google_drive_path+'best_model.pth')
start_epoch = checkpoint['epoch']+1
net2.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

Set requires_grad for all layers except last two as False

In [None]:
for param in net2.parameters():
    param.requires_grad = False
# Unfreeze the last few layers
for param in net2.fc1.parameters():
    param.requires_grad = True

In [None]:
optimizer = optim.SGD(net2.parameters(), lr=0.001, momentum=0.9)

In [None]:
del loss

In [None]:
start_epoch=0
best_accuracy = -1.0
for epoch in range(start_epoch,end_epoch):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate( trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net2(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

    current_accuracy = validator(testloader=testloader,net=net2)
    if current_accuracy>best_accuracy:
        best_accuracy = current_accuracy

        torch.save(
            {'epoch':epoch,
             'model_state_dict': net2.state_dict(),
             'optimizer_state_dict': optimizer.state_dict()
             },

             your_google_drive_path+'hindi_ft_best_model.pth')

    #Save model as checkpoint
    torch.save(
        {'epoch':epoch,
         'model_state_dict': net2.state_dict(),
         'optimizer_state_dict': optimizer.state_dict()
         },
         your_google_drive_path+'hindi_finetuned_checkpoint.pth')
print('Finished Training')

Ground Truth

In [None]:
dataiter = iter(testloader)
images, labels = next(dataiter)
# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(4)))

Predicting the above images using above

In [None]:
net_best = Net().to(device)
checkpoint = torch.load(your_google_drive_path+'hindi_ft_best_model.pth')
net_best.load_state_dict(checkpoint['model_state_dict'])


images = images.to(device)
labels = labels.to(device)

outputs = net_best(images)

In [None]:
_, predicted = torch.max(outputs, 1)
print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(4)))
print('Predicted: ', ' '.join(f'{classes[predicted[j]]:5s}' for j in range(4)))