In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

ALL IMPORTS

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as T
from torchvision import models
from torchvision.transforms import Compose, Resize, InterpolationMode, ToTensor, Normalize
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from concurrent.futures import ThreadPoolExecutor

import matplotlib.pyplot as plt
from torchsummary import summary
from PIL import Image

import os, copy, zipfile, time
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

DATA MANIPULATION & PREPROCESSING

In [None]:
baseDir = '/kaggle/working/'
inDir = '/kaggle/input/street-view-getting-started-with-julia/'

In [None]:
def dataExtraction(extractionDir, filePath):
    # Create the target directory if it does not exist
    if not os.path.exists(extractionDir):
        os.makedirs(extractionDir)

    try:
        with zipfile.ZipFile(filePath, "r") as zfile:
            zfile.extractall(extractionDir)
        print(f"Extraction complete. Files have been extracted to: {extractionDir}")

    except zipfile.BadZipFile:
        print(f"Error: '{filePath}' is not a valid ZIP file or may be corrupted.")

    except FileNotFoundError:
        print(f"Error: The file '{filePath}' does not exist.")

In [None]:
dataExtraction(baseDir, inDir + 'train.zip')
dataExtraction(baseDir, inDir + 'test.zip')

In [None]:
os.path.isdir(baseDir)

In [None]:
class ImageDataset(Dataset):
    def __init__(self, csvFile, imgDir, transform=None):
        self.labels = pd.read_csv(csvFile)
        self.imgDir = imgDir
        self.transform = transform

        # self.labels = self.labels.head(200)
        unq = sorted(self.labels['Class'].unique())
        self.toLabel = {char: i for i, char in enumerate(unq)}
        self.labels['Class'] = self.labels['Class'].map(self.toLabel)
        self.labels = self.labels.values.tolist()
        # print(self.labels, type(self.labels))
        

        # Load all images into memory using parallel processing
        self.images = [None] * len(self.labels)
        with ThreadPoolExecutor() as executor:
            executor.map(self.loadImage, range(len(self.labels)))

    def loadImage(self, idx):
        imgPath = os.path.join(self.imgDir, str(self.labels[idx][0]) + ".Bmp")

        image = Image.open(imgPath).convert("RGB")
        if self.transform:
            image = self.transform(image)
        self.images[idx] = image

    def __len__(self):
        return len(self.labels)
    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx][1]
        return image, label



In [None]:
imgSize = 224

In [None]:
mean_rgb = [0.51633593, 0.47992162, 0.45069369]
std_rgb = [0.257224, 0.246612,  0.253914]

In [None]:
transform = T.Compose([T.Resize((imgSize, imgSize)),
                       T.ToTensor(),
                       T.Normalize(mean_rgb, std_rgb)
                       ])

dataset = ImageDataset(inDir + 'trainLabels.csv', baseDir + 'train', transform)
torch.save(dataset, baseDir + 'charDatasetTrain.pt')

# torch.serialization.add_safe_globals([ImageDataset, Compose, Resize, ToTensor, Normalize, InterpolationMode])
# dataset = torch.load(baseDir + 'charDatasetTrain.pt')



trainSize = int(0.9 * len(dataset))
testSize = len(dataset) - trainSize
trainData, testData = torch.utils.data.random_split(dataset, [trainSize, testSize], generator=torch.Generator().manual_seed(123))

In [None]:
batchSize = 32
trainLoader = DataLoader(trainData, batch_size=batchSize, shuffle=True, drop_last=True)
testLoader = DataLoader(testData, batch_size=batchSize, shuffle=False)

In [None]:
print(len(trainData))

In [None]:
def deNormalize(image, mean, std):
    mean = torch.tensor(mean).view(3, 1, 1)
    std = torch.tensor(std).view(3, 1, 1)
    return image * std + mean

In [None]:
import random

fig, axes = plt.subplots(4, 4, figsize=(12, 6))
axes = axes.flatten()
random.seed(124)
random_indices = random.sample(range(len(trainData)), 16)

for i, index in enumerate(random_indices):
    image, label = trainData[index]
    image = deNormalize(image, mean_rgb, std_rgb)
    image = torch.clamp(image, 0, 1)
    image = image.permute(1, 2, 0)

    axes[i].imshow(image)

    axes[i].set_title(f"Label: {label}\nShape: {image.shape}")
    axes[i].axis("off")


plt.tight_layout()
plt.show()

In [None]:
# Load the CSV file
df = pd.read_csv(inDir + 'trainLabels.csv')
labels = df['Class']


# Plot the frequencies
plt.figure(figsize=(20, 6))
plt.hist(labels, bins = len(set(labels)),color='navy', edgecolor='black')
plt.xlabel('Class Labels')
plt.ylabel('Frequency')
plt.title('Class Frequency Distribution')
plt.xticks(rotation=45, ha='right')  # Rotate labels for better readability
plt.show()

In [None]:
def createNet():

    net = models.resnet50(pretrained=True)
    
    # for name, param in net.named_parameters():
    #     if "layer4" in name or "fc" in name:
    #         param.requires_grad = True
    #     else:
    #         param.requires_grad = False
    in_features = net.fc.in_features
    # nn.fc = nn.Linear(in_features, 62)
    net.fc = nn.Sequential(
               nn.Linear(in_features, 256),
               nn.ReLU(inplace=True),
               nn.Linear(256, 62))
    lossFun = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=0.003)
    return net, lossFun, optimizer

In [None]:
net, lossFun, optimizer = createNet()
net = net.to(device)
# summary(net, (3, imgSize, imgSize))
for child in net.named_children():
    print(child[0])
# net

In [None]:
def testNet(net, lossFun, optimizer, testLoader):
    net.eval()
    matches = 0
    for data, labels in testLoader:
        # print(labels)
        if device != 'cpu':
            data = data.to(device)
            labels = labels.to(device)
        with torch.no_grad():
            pred = net(data)
        if device != 'cpu':
            labels = labels.to(device)
            pred = pred.to(device)
        # print(pred)
        pred = torch.argmax(pred, axis=1)
        matches += (pred == labels).sum().item()
    # print(matches)
    acc = 100 * matches / len(testLoader.dataset)

    return acc

In [None]:
testNet(net, lossFun, optimizer, testLoader)

In [None]:
def trainNet(net, lossFun, optimizer, trainLoader, epochs):

    timeStart = time.process_time()
    timeNow = timeStart

    trainAcc = []
    trainLoss = []
    testAcc = []

    for epoch in range(epochs):

        net.train()

        batchAcc = []
        batchLoss = []

        for data, labels in trainLoader:
            if device != "cpu":
                data = data.to(device)
                labels = labels.to(device)

            ## Calculate forward pass and loss
            pred = net(data)
            loss = lossFun(pred, labels)
    
            ## backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
            ## accurecy and loss
            pred = pred.cpu()
            labels = labels.cpu()
            matches = (torch.argmax(pred, axis=1) == labels).float()
            acc = 100 * torch.mean(matches)
    
            batchAcc.append(acc)
            batchLoss.append(loss.item())
            # print(acc, epoch)

        trainAcc.append(np.mean(batchAcc))
        trainLoss.append(np.mean(batchLoss))

        testAcc.append(testNet(net, lossFun, optimizer, testLoader))

        timeTaken = time.process_time() - timeNow
        elapsedTime = time.process_time() - timeStart
        timeNow = time.process_time()

        print(f"Epoch {epoch+1}/{epochs}, time taken: {timeTaken:.2f}, elapsed time: {elapsedTime:.2f} sec, test accuracy {testAcc[-1]:.4f}%")

    return trainAcc, testAcc, trainLoss

In [None]:
trainAcc, testAcc, trainLoss = trainNet(net, lossFun, optimizer, trainLoader, 10)

In [None]:
fig, aa = plt.subplots(1, 2, figsize = (16, 5))

aa[0].plot(trainLoss)
aa[0].set_ylim(0, 100)

aa[1].plot(trainAcc)
aa[1].plot(testAcc)

plt.show()

In [None]:
torch.save(net.state_dict(), baseDir + 'characterRecognition.pt')

In [None]:
net.to(device)
acc = testNet(net, lossFun, optimizer, testLoader)
print(f'test accuracy {acc:.4f}%')

In [None]:
class ImageDatasetTest(Dataset):
    def __init__(self, imgDir, transform=None):
        self.imgDir = imgDir
        self.transform = transform
        # Get a list of all image files in the dir
        self.image_files = sorted(
            [f for f in os.listdir(imgDir) if f.endswith(".Bmp")],
            key=lambda x: int(os.path.splitext(x)[0])
        )
        # print(self.image_files)
        # Load all images into memory using parallel processing
        self.images = [None] * len(self.image_files)
        with ThreadPoolExecutor() as executor:
            executor.map(self.loadImage, range(len(self.image_files)))

    def loadImage(self, idx):
        imgPath = os.path.join(self.imgDir, self.image_files[idx])
        image = Image.open(imgPath).convert("RGB")
        if self.transform:
            image = self.transform(image)
        self.images[idx] = image

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image = self.images[idx]
        return image

In [None]:
datasetTest = ImageDatasetTest(baseDir + 'test', transform)
torch.save(datasetTest, baseDir + 'charDatasetTest.pt')

# torch.serialization.add_safe_globals([ImageDatasetTest])
# datasetTest = torch.load(baseDir + 'charDatasetTest.pt')

testLoader1 = DataLoader(datasetTest, batch_size=1024, shuffle=False)

In [None]:
datasetTest

In [None]:
net.eval()
pred = []
with torch.no_grad():
  for x in testLoader1:
    x = x.to(device)
    pred.append(net(x).cpu())
pred1 = pred

In [None]:
pred = pred1
pred = [torch.argmax(t, axis = 1) for t in pred]
pred = torch.cat(pred)
pred.shape

In [None]:
alphabet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
pred = [alphabet[val] for val in pred.tolist()]

In [None]:
df = pd.DataFrame(pred, columns=['Class'])
df.index = np.arange(6284, len(df) + 6284)
df.index.name = 'ID'
df.head()

In [None]:
df.to_csv(baseDir + 'submission.csv', index = True)

In [None]:
fig, axes = plt.subplots(4, 4, figsize=(12, 6))
axes = axes.flatten()
random.seed(3423)
random_indices = random.sample(range(len(datasetTest)), 16)

for i, index in enumerate(random_indices):
    image = datasetTest[index]
    label = df.iloc[index, 0]
    image = deNormalize(image, mean_rgb, std_rgb)
    image = torch.clamp(image, 0, 1)
    image = image.permute(1, 2, 0)
    axes[i].imshow(image)

    axes[i].set_title(f"Label: {label}\nShape: {image.shape}")
    axes[i].axis("off")


plt.tight_layout()
plt.show()