In [1]:
from model.LPRNet import build_lprnet
from torchsummary import summary

model = build_lprnet()
summary(model, input_size=(1, 24, 94))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 3, 24, 94]               6
              ReLU-2            [-1, 3, 24, 94]               0
            Conv2d-3            [-1, 3, 24, 94]              12
              ReLU-4            [-1, 3, 24, 94]               0
            Conv2d-5            [-1, 3, 24, 94]              12
              ReLU-6            [-1, 3, 24, 94]               0
            Conv2d-7           [-1, 64, 22, 92]           1,792
       BatchNorm2d-8           [-1, 64, 22, 92]             128
              ReLU-9           [-1, 64, 22, 92]               0
        MaxPool3d-10           [-1, 64, 20, 90]               0
           Conv2d-11           [-1, 32, 20, 90]           2,080
             ReLU-12           [-1, 32, 20, 90]               0
           Conv2d-13           [-1, 32, 20, 90]           3,104
             ReLU-14           [-1, 32,

In [2]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
from torchvision import transforms
import string

class ImageFolderCTCDataset(Dataset):
    def __init__(self, folder_path, image_shape=(1, 24, 94), augment=False):
        self.folder_path = folder_path
        self.image_files = os.listdir(folder_path)
        _, height, width = image_shape

        if augment:
            self.transform = transforms.Compose([
                transforms.Resize((height, width)),
                transforms.RandomAffine(
                    degrees=5,
                    translate=(0.05, 0.05),
                    scale=(0.9, 1.1),
                    shear=0
                ),
                transforms.Grayscale(num_output_channels=1),
                transforms.ToTensor()   # grayscale tensor in [0,1]
            ])
        else:
            self.transform = transforms.Compose([
                transforms.Resize((height, width)),
                transforms.Grayscale(num_output_channels=1),
                transforms.ToTensor()   # grayscale tensor in [0,1]
            ])

        # dictionary build
        self.chars = list(string.digits + string.ascii_uppercase)
        self.chars.append('-')
        self.char_to_idx = {ch: i for i, ch in enumerate(self.chars)}
        self.idx_to_char = {i: ch for i, ch in enumerate(self.chars)}

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.folder_path, img_name)

        image = Image.open(img_path).convert("L")  # grayscale
        image = self.transform(image)

        # Extract label
        label_str = img_name.split('.')[0].split('_')[0]
        label_encoded = [self.char_to_idx[ch] for ch in label_str]

        return image, torch.tensor(label_encoded, dtype=torch.long), len(label_encoded)


In [3]:
IMAGE_SHAPE = (3, 24, 94)
train_ds = ImageFolderCTCDataset(r"lprds\train", image_shape=IMAGE_SHAPE, augment=True)
val_ds = ImageFolderCTCDataset(r"lprds\val", image_shape=IMAGE_SHAPE)
test_ds = ImageFolderCTCDataset(r"lprds\test", image_shape=IMAGE_SHAPE)

img, label_encoded, label_length = train_ds[0]

print(img.shape)           # torch.Size([3, 24, 94])
print(label_encoded)       # tensor([ 0,  0, 21, 17,  2,  8,  7,  7])
print(label_length)        # 8



torch.Size([1, 24, 94])
tensor([ 0,  0, 21, 17,  2,  8,  7,  7])
8


In [4]:
from torch.utils.data import DataLoader


def ctc_collate_fn(batch):
    images, labels, lengths = zip(*batch)  # unzip list of tuples

    # Stack images [B, C, H, W]
    images = torch.stack(images, dim=0)

    # Concatenate all labels into one flat tensor
    labels = torch.cat(labels)

    # Convert lengths to tensor
    lengths = torch.tensor(lengths, dtype=torch.long)

    return images, labels, lengths


BATCH_SIZE = 32
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, collate_fn=ctc_collate_fn)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, collate_fn=ctc_collate_fn)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, collate_fn=ctc_collate_fn)


In [5]:

# check a batch
images, labels, lengths = next(iter(train_loader))
print(images.shape)   # [32, 3, 24, 94]
print(labels.shape)   # flat 1D tensor, e.g. torch.Size([180])
print(lengths.shape)  # [32], lengths of each label


torch.Size([32, 1, 24, 94])
torch.Size([223])
torch.Size([32])


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

# -------- Config ----------
CLASS_NUM = 37              # number of classes (0-9, A-Z, plus blank)
MAX_LABEL_LEN = 10          # max characters per sample (fake, for model design)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
LR = 1e-3
EPOCHS = 100
BATCH_SIZE = 32
# --------------------------

# ===== Dataset & DataLoader =====
def ctc_collate_fn(batch):
    images, labels, lengths = zip(*batch)
    images = torch.stack(images, dim=0)
    labels = torch.cat(labels)
    lengths = torch.tensor(lengths, dtype=torch.long)
    return images, labels, lengths

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, collate_fn=ctc_collate_fn)
val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, collate_fn=ctc_collate_fn)

# ===== Model, Loss, Optimizer =====
model = build_lprnet(MAX_LABEL_LEN, CLASS_NUM).to(DEVICE)
model.load_state_dict(torch.load("lprnet_best.pth", map_location=DEVICE))

criterion = nn.CTCLoss(blank=train_ds.char_to_idx['-'], reduction='mean')
optimizer = optim.Adam(model.parameters(), lr=LR)


# ===== Training & Validation =====
def train_one_epoch(epoch):
    model.train()
    running_loss = 0.0

    for batch_idx, (images, labels, target_lengths) in enumerate(train_loader):
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)
        target_lengths = target_lengths.to(DEVICE)

        # Forward
        logits = model(images)                     # [N, C, T]
        logits = logits.permute(2, 0, 1)           # [T, N, C]
        log_probs = logits.log_softmax(2)

        # Input lengths = all T
        input_lengths = torch.full(size=(images.size(0),), 
                                   fill_value=logits.size(0), 
                                   dtype=torch.long).to(DEVICE)

        # Loss
        loss = criterion(log_probs, labels, input_lengths, target_lengths)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if (batch_idx+1) % 10 == 0:
            print(f"Epoch [{epoch+1}], Step [{batch_idx+1}/{len(train_loader)}], "
                  f"Loss: {running_loss/10:.4f}")
            running_loss = 0.0


def validate(epoch):
    model.eval()
    val_loss = 0.0

    with torch.no_grad():
        for images, labels, target_lengths in val_loader:
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)
            target_lengths = target_lengths.to(DEVICE)

            logits = model(images)
            logits = logits.permute(2, 0, 1)
            log_probs = logits.log_softmax(2)

            input_lengths = torch.full(size=(images.size(0),), 
                                       fill_value=logits.size(0), 
                                       dtype=torch.long).to(DEVICE)

            loss = criterion(log_probs, labels, input_lengths, target_lengths)
            val_loss += loss.item()

    val_loss /= len(val_loader)
    print(f"Epoch [{epoch+1}] Validation Loss: {val_loss:.4f}")
    return val_loss


# ===== Main Loop =====
best_val_loss = float("inf")

for epoch in range(EPOCHS):
    train_one_epoch(epoch)
    val_loss = validate(epoch)

    # Save best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "lprnet_best.pth")
        print(f"✅ Saved best model at epoch {epoch+1} with val_loss={val_loss:.4f}")


Epoch [1], Step [10/19], Loss: 1.0673
Epoch [1] Validation Loss: 5.0664
✅ Saved best model at epoch 1 with val_loss=5.0664
Epoch [2], Step [10/19], Loss: 0.8283
Epoch [2] Validation Loss: 3.1402
✅ Saved best model at epoch 2 with val_loss=3.1402
Epoch [3], Step [10/19], Loss: 0.7835
Epoch [3] Validation Loss: 1.0642
✅ Saved best model at epoch 3 with val_loss=1.0642
Epoch [4], Step [10/19], Loss: 0.6801
Epoch [4] Validation Loss: 1.2981
Epoch [5], Step [10/19], Loss: 0.6279
Epoch [5] Validation Loss: 3.2551
Epoch [6], Step [10/19], Loss: 0.5921
Epoch [6] Validation Loss: 3.8942
Epoch [7], Step [10/19], Loss: 0.5908
Epoch [7] Validation Loss: 1.8545
Epoch [8], Step [10/19], Loss: 0.5524
Epoch [8] Validation Loss: 1.8384
Epoch [9], Step [10/19], Loss: 0.6680
Epoch [9] Validation Loss: 3.0841
Epoch [10], Step [10/19], Loss: 0.5515
Epoch [10] Validation Loss: 3.5757
Epoch [11], Step [10/19], Loss: 0.5237
Epoch [11] Validation Loss: 1.5007
Epoch [12], Step [10/19], Loss: 0.4835
Epoch [12] V

In [7]:
def validate_test():
    model.eval()
    val_loss = 0.0

    with torch.no_grad():
        for images, labels, target_lengths in test_loader:
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)
            target_lengths = target_lengths.to(DEVICE)

            logits = model(images)
            logits = logits.permute(2, 0, 1)
            log_probs = logits.log_softmax(2)

            input_lengths = torch.full(size=(images.size(0),), 
                                       fill_value=logits.size(0), 
                                       dtype=torch.long).to(DEVICE)

            loss = criterion(log_probs, labels, input_lengths, target_lengths)
            val_loss += loss.item()

    val_loss /= len(val_loader)
    return val_loss

print(f"Test Loss: {validate_test():.4f}") 

Test Loss: 0.6055


In [8]:
# Load best model
model.load_state_dict(torch.load("lprnet_best.pth", map_location=DEVICE))
model.eval()

test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, collate_fn=ctc_collate_fn)

with torch.no_grad():
    for images, labels, target_lengths in test_loader:
        images = images.to(DEVICE)
        logits = model(images)                     # [N, C, T]
        preds = logits.argmax(1)                   # simple greedy decode (still needs CTC decoding)
        print(preds.shape)  # torch.Size([32, 18])
        break


torch.Size([32, 18])


In [9]:
def greedy_decode(logits, idx_to_char, blank_idx):
    """
    logits: [T, N, C] tensor (log probs or raw logits)
    idx_to_char: dictionary mapping int -> char
    blank_idx: index of blank symbol
    """
    preds = logits.argmax(2).permute(1, 0)   # [N, T]

    results = []
    for pred in preds:
        string = ""
        prev = None
        for p in pred.cpu().numpy():
            if p != prev and p != blank_idx:   # collapse + remove blank
                string += idx_to_char[p]
            prev = p
        results.append(string)
    return results


In [10]:
def predict_image(model, image, dataset):
    """
    model   : trained model
    image   : tensor [3, 24, 94]
    dataset : dataset object (for idx_to_char, blank index)
    """
    model.eval()
    with torch.no_grad():
        image = image.unsqueeze(0).to(DEVICE)        # add batch dim [1, 3, 24, 94]
        logits = model(image)                        # [N, C, T]
        logits = logits.permute(2, 0, 1)             # [T, N, C]

        preds = greedy_decode(logits, dataset.idx_to_char, dataset.char_to_idx['-'])
        return preds[0]


In [11]:
# Take a sample from your test set

i = 9
img, _, _ = test_ds[i]
print("Ground truth:", test_ds.image_files[i])

pred = predict_image(model, img, test_ds)
print("Prediction  :", pred)


Ground truth: 8427XX29.jpg
Prediction  : 8427XX29


In [12]:
def evaluate_accuracy(model, dataset):
    model.eval()
    correct = 0
    total = len(dataset)

    with torch.no_grad():
        for i in range(total):
            img, label_encoded, _ = dataset[i]

            # Ground truth string
            label_str = "".join(dataset.idx_to_char[idx.item()] for idx in label_encoded)

            # Prediction
            pred_str = predict_image(model, img, dataset)

            if pred_str == label_str:
                correct += 1
            else:
                print(f"GT: {label_str} | Pred: {pred_str}")
                

    accuracy = correct / total
    return accuracy

# train_acc = evaluate_accuracy(model, train_ds)
# print(f"Train Accuracy: {train_acc:.2%}")

# val_acc = evaluate_accuracy(model, val_ds)
# print(f"Val Accuracy: {val_acc:.2%}")

test_acc = evaluate_accuracy(model, test_ds)
print(f"Test Accuracy: {test_acc:.2%}")


GT: 1033IR | Pred: 1033HR
GT: 2348XR25 | Pred: 349Y92
GT: 55SG53 | Pred: 56SG53
GT: 5B40001 | Pred: 5840001
GT: AC508V | Pred: C508V
GT: AL193VP | Pred: AL193WP
GT: B225RTK | Pred: B275RTK
GT: B961TAG | Pred: B961TA
GT: CM101LW | Pred: 4133119
GT: CV194VA | Pred: CY194VA
GT: EH577PH | Pred: EMH577PH
GT: EJQ588 | Pred: E10588
GT: EQ725QJ | Pred: CQ7250J
GT: FC882MC | Pred: FC882WC
GT: GB1ECB | Pred: GB1CB
GT: H864JGM | Pred: H864JHGM
GT: I008646 | Pred: T008646
GT: IICE3083 | Pred: CE3683
GT: KZ746AR | Pred: KZ7A6AR
GT: LBHT71 | Pred: 1931331
GT: PH20CNP | Pred: P20CN
GT: PI36K0G | Pred: PI36KOG
GT: SG49711 | Pred: S49711
GT: SLF9995 | Pred: S139993
GT: TR95XMI | Pred: TR95XM
GT: VI496AN | Pred: V496AN
GT: VS236891 | Pred: S236891
Test Accuracy: 63.01%
