In [21]:
import os
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence
from torchvision.transforms.functional import to_tensor
from torch.nn import CTCLoss
import string
from tqdm import tqdm

In [34]:
BATCH_SIZE = 16
IMG_HEIGHT = 32
IMG_WIDTH = 128
EPOCHS = 30
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
CHARS = string.ascii_uppercase + string.digits+string.ascii_lowercase+"-_' "

In [35]:
class LabelConverter:
    def __init__(self, characters):
        self.chars = characters
        self.char2idx = {char: i+1 for i, char in enumerate(characters)}  # +1 for CTC blank
        self.idx2char = {i+1: char for i, char in enumerate(characters)}
        self.blank = 0

    def encode(self, text):
        return torch.tensor([self.char2idx[c] for c in text], dtype=torch.long)

    def decode(self, preds):
        preds = preds.argmax(2).squeeze(1).detach().cpu().numpy()
        texts = []
        for pred in preds:
            text = ''
            prev = -1
            for p in pred:
                if p != prev and p != self.blank:
                    text += self.idx2char[p]
                prev = p
            texts.append(text)
        return texts

converter = LabelConverter(CHARS)

In [36]:
class CRNNDataset(Dataset):
    def __init__(self, csv_path, transform=None):
        df = pd.read_csv(csv_path)
        self.paths = df['FILENAME'].values
        self.labels = df['IDENTITY'].values
        self.transform = transform or transforms.Compose([
            transforms.Grayscale(),
            transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,))
        ])

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        img = Image.open(self.paths[idx]).convert("RGB")
        img = self.transform(img)
        label_str = str(self.labels[idx])
        label = converter.encode(label_str)
        return {'image': img, 'label': label}

In [37]:
def collate_fn(batch):
    images = [item['image'] for item in batch]
    labels = [item['label'] for item in batch]
    label_lengths = torch.tensor([len(label) for label in labels], dtype=torch.long)
    images = torch.stack(images)
    labels_concat = torch.cat(labels)
    return images, labels_concat, label_lengths

In [38]:
class CRNN(nn.Module):
    def __init__(self, num_classes):
        super(CRNN, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 64, 3, 1, 1), nn.ReLU(), nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, 3, 1, 1), nn.ReLU(), nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, 3, 1, 1), nn.ReLU(),
            nn.Conv2d(256, 256, 3, 1, 1), nn.ReLU(), nn.MaxPool2d((2, 1)),
            nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512), nn.ReLU(),
            nn.Conv2d(512, 512, 3, 1, 1), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d((2, 1)),
            nn.Conv2d(512, 512, 2, 1, 0), nn.ReLU(),
        )
        self.rnn1 = nn.LSTM(512, 256, bidirectional=True, batch_first=True)
        self.rnn2 = nn.LSTM(512, 256, bidirectional=True, batch_first=True)
        self.fc = nn.Linear(512, num_classes + 1)  # +1 for CTC blank

    def forward(self, x):
        x = self.cnn(x)  # [B, 512, 1, W]
        x = x.squeeze(2).permute(0, 2, 1)  # [B, W, 512]
        x, _ = self.rnn1(x)  # [B, W, 512]
        x, _ = self.rnn2(x)  # [B, W, 512]
        x = self.fc(x)  # [B, W, C]
        return x.permute(1, 0, 2)  # [W, B, C] for CTC

In [45]:
dataset = CRNNDataset(r'C:\Users\Raihan\OneDrive\Desktop\DPIIT HACKATHON\cvsi_fullpath.csv')
loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
print(len(dataset))

330961


In [47]:
def train():
    dataset = CRNNDataset(r'C:\Users\Raihan\OneDrive\Desktop\DPIIT HACKATHON\cvsi_10k_fullpath.csv')
    loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)

    model = CRNN(num_classes=len(CHARS)).to(DEVICE)
    criterion = CTCLoss(blank=0, zero_infinity=True)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    for epoch in (range(EPOCHS)):
        model.train()
        total_loss = 0
        for images, labels, label_lengths in tqdm(loader):
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)

            preds = model(images)
            input_lengths = torch.full(size=(images.size(0),), fill_value=preds.size(0), dtype=torch.long)

            loss = criterion(preds, labels, input_lengths, label_lengths)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"[Epoch {epoch+1}] Loss: {total_loss:.4f}")

In [48]:
train()

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\Raihan\\OneDrive\\Desktop\\DPIIT HACKATHON\\cvsi_10k_fullpath.csv'

In [46]:
full_csv_path = r'C:\Users\Raihan\OneDrive\Desktop\DPIIT HACKATHON\cvsi_fullpath.csv'
df = pd.read_csv(full_csv_path)

# Shuffle and take first 10,000 entries
df_subset = df.sample(n=10000, random_state=42).reset_index(drop=True)

# Save to new CSV
subset_path = r'C:\Users\Raihan\OneDrive\Desktop\DPIIT HACKATHON\cvsi_10k_subset.csv'
df_subset.to_csv(subset_path, index=False)

print("✅ Saved 10k subset CSV at:", subset_path)

✅ Saved 10k subset CSV at: C:\Users\Raihan\OneDrive\Desktop\DPIIT HACKATHON\cvsi_10k_subset.csv
