In [29]:
from model.LPRNet import build_lprnet
from torchsummary import summary

model = build_lprnet()
summary(model, input_size=(3, 24, 94))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 22, 92]           1,792
       BatchNorm2d-2           [-1, 64, 22, 92]             128
              ReLU-3           [-1, 64, 22, 92]               0
         MaxPool3d-4           [-1, 64, 20, 90]               0
            Conv2d-5           [-1, 32, 20, 90]           2,080
              ReLU-6           [-1, 32, 20, 90]               0
            Conv2d-7           [-1, 32, 20, 90]           3,104
              ReLU-8           [-1, 32, 20, 90]               0
            Conv2d-9           [-1, 32, 20, 90]           3,104
             ReLU-10           [-1, 32, 20, 90]               0
           Conv2d-11          [-1, 128, 20, 90]           4,224
small_basic_block-12          [-1, 128, 20, 90]               0
      BatchNorm2d-13          [-1, 128, 20, 90]             256
             ReLU-14          [-1, 128,

In [30]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
from torchvision import transforms
import string

class ImageFolderCTCDataset(Dataset):
    def __init__(self, folder_path, image_shape=(3, 24, 94), augment=False):
        self.folder_path = folder_path
        self.image_files = os.listdir(folder_path)
        _, height, width = image_shape

        if augment:
            self.transform = transforms.Compose([
                transforms.Resize((height, width)),
                transforms.RandomAffine(
                    degrees=5,              # small rotation (±5°)
                    translate=(0.05, 0.05), # shift up to 5% horizontally/vertically
                    scale=(0.9, 1.1),       # zoom in/out 10%
                    shear=0                 # keep shear = 0 (optional)
                ),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
            ])
        else:
            self.transform = transforms.Compose([
                transforms.Resize((height, width)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
            ])

        # dictionary build
        self.chars = list(string.digits + string.ascii_uppercase)
        self.chars.append('-')
        self.char_to_idx = {ch: i for i, ch in enumerate(self.chars)}
        self.idx_to_char = {i: ch for i, ch in enumerate(self.chars)}

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.folder_path, img_name)

        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)

        # Extract label
        label_str = img_name.split('.')[0].split('_')[0]
        label_encoded = [self.char_to_idx[ch] for ch in label_str]

        return image, torch.tensor(label_encoded, dtype=torch.long), len(label_encoded)



In [31]:
IMAGE_SHAPE = (3, 24, 94)
train_ds = ImageFolderCTCDataset(r"lprds\train", image_shape=IMAGE_SHAPE, augment=True)
val_ds = ImageFolderCTCDataset(r"lprds\val", image_shape=IMAGE_SHAPE)
test_ds = ImageFolderCTCDataset(r"lprds\test", image_shape=IMAGE_SHAPE)

img, label_encoded, label_length = train_ds[0]

print(img.shape)           # torch.Size([3, 24, 94])
print(label_encoded)       # tensor([ 0,  0, 21, 17,  2,  8,  7,  7])
print(label_length)        # 8



torch.Size([3, 24, 94])
tensor([ 0,  0, 21, 17,  2,  8,  7,  7])
8


In [32]:
from torch.utils.data import DataLoader


def ctc_collate_fn(batch):
    images, labels, lengths = zip(*batch)  # unzip list of tuples

    # Stack images [B, C, H, W]
    images = torch.stack(images, dim=0)

    # Concatenate all labels into one flat tensor
    labels = torch.cat(labels)

    # Convert lengths to tensor
    lengths = torch.tensor(lengths, dtype=torch.long)

    return images, labels, lengths


BATCH_SIZE = 32
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, collate_fn=ctc_collate_fn)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, collate_fn=ctc_collate_fn)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, collate_fn=ctc_collate_fn)


In [33]:
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, collate_fn=ctc_collate_fn)
val_loader   = DataLoader(val_ds, batch_size=32, shuffle=False, collate_fn=ctc_collate_fn)
test_loader  = DataLoader(test_ds, batch_size=32, shuffle=False, collate_fn=ctc_collate_fn)

# check a batch
images, labels, lengths = next(iter(train_loader))
print(images.shape)   # [32, 3, 24, 94]
print(labels.shape)   # flat 1D tensor, e.g. torch.Size([180])
print(lengths.shape)  # [32], lengths of each label


torch.Size([32, 3, 24, 94])
torch.Size([219])
torch.Size([32])


In [34]:
def make_input_lengths_from_logits(logits):
    # logits: (N, class_num, T)
    T = logits.size(2)
    batch_size = logits.size(0)
    # CTC expects input_lengths per sample (length of T for each sample)
    return torch.full((batch_size,), T, dtype=torch.long)

In [36]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

# -------- Config ----------
CLASS_NUM = 37              # number of classes (0-9, A-Z, plus blank)
MAX_LABEL_LEN = 10          # max characters per sample (fake, for model design)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
LR = 1e-3
EPOCHS = 100
BATCH_SIZE = 32
# --------------------------

# ===== Dataset & DataLoader =====
def ctc_collate_fn(batch):
    images, labels, lengths = zip(*batch)
    images = torch.stack(images, dim=0)
    labels = torch.cat(labels)
    lengths = torch.tensor(lengths, dtype=torch.long)
    return images, labels, lengths

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, collate_fn=ctc_collate_fn)
val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, collate_fn=ctc_collate_fn)

# ===== Model, Loss, Optimizer =====
model = build_lprnet(MAX_LABEL_LEN, CLASS_NUM).to(DEVICE)
model.load_state_dict(torch.load("Final_LPRNet_model.pth", map_location=DEVICE))

criterion = nn.CTCLoss(blank=train_ds.char_to_idx['-'], reduction='mean')
optimizer = optim.Adam(model.parameters(), lr=LR)


# ===== Training & Validation =====
def train_one_epoch(epoch):
    model.train()
    running_loss = 0.0

    for batch_idx, (images, labels, target_lengths) in enumerate(train_loader):
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)
        target_lengths = target_lengths.to(DEVICE)

        # Forward
        logits = model(images)                     # [N, C, T]
        logits = logits.permute(2, 0, 1)           # [T, N, C]
        log_probs = logits.log_softmax(2)

        # Input lengths = all T
        input_lengths = torch.full(size=(images.size(0),), 
                                   fill_value=logits.size(0), 
                                   dtype=torch.long).to(DEVICE)

        # Loss
        loss = criterion(log_probs, labels, input_lengths, target_lengths)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if (batch_idx+1) % 10 == 0:
            print(f"Epoch [{epoch+1}], Step [{batch_idx+1}/{len(train_loader)}], "
                  f"Loss: {running_loss/10:.4f}")
            running_loss = 0.0


def validate(epoch):
    model.eval()
    val_loss = 0.0

    with torch.no_grad():
        for images, labels, target_lengths in val_loader:
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)
            target_lengths = target_lengths.to(DEVICE)

            logits = model(images)
            logits = logits.permute(2, 0, 1)
            log_probs = logits.log_softmax(2)

            input_lengths = torch.full(size=(images.size(0),), 
                                       fill_value=logits.size(0), 
                                       dtype=torch.long).to(DEVICE)

            loss = criterion(log_probs, labels, input_lengths, target_lengths)
            val_loss += loss.item()

    val_loss /= len(val_loader)
    print(f"Epoch [{epoch+1}] Validation Loss: {val_loss:.4f}")
    return val_loss


# ===== Main Loop =====
best_val_loss = float("inf")

for epoch in range(EPOCHS):
    train_one_epoch(epoch)
    val_loss = validate(epoch)

    # Save best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "lprnet_best2.pth")
        print(f"✅ Saved best model at epoch {epoch+1} with val_loss={val_loss:.4f}")


Epoch [1], Step [10/19], Loss: 0.4647
Epoch [1] Validation Loss: 0.5779
✅ Saved best model at epoch 1 with val_loss=0.5779
Epoch [2], Step [10/19], Loss: 0.3850
Epoch [2] Validation Loss: 0.3969
✅ Saved best model at epoch 2 with val_loss=0.3969
Epoch [3], Step [10/19], Loss: 0.3545
Epoch [3] Validation Loss: 0.4347
Epoch [4], Step [10/19], Loss: 0.3231
Epoch [4] Validation Loss: 0.4081
Epoch [5], Step [10/19], Loss: 0.2885
Epoch [5] Validation Loss: 0.4109
Epoch [6], Step [10/19], Loss: 0.3288
Epoch [6] Validation Loss: 0.4906
Epoch [7], Step [10/19], Loss: 0.3437
Epoch [7] Validation Loss: 0.3639
✅ Saved best model at epoch 7 with val_loss=0.3639
Epoch [8], Step [10/19], Loss: 0.3210
Epoch [8] Validation Loss: 0.5676
Epoch [9], Step [10/19], Loss: 0.3426
Epoch [9] Validation Loss: 0.5703
Epoch [10], Step [10/19], Loss: 0.3144
Epoch [10] Validation Loss: 0.4219
Epoch [11], Step [10/19], Loss: 0.3038
Epoch [11] Validation Loss: 0.4258
Epoch [12], Step [10/19], Loss: 0.3096
Epoch [12] V

In [37]:
def validate_test():
    model.eval()
    val_loss = 0.0

    with torch.no_grad():
        for images, labels, target_lengths in test_loader:
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)
            target_lengths = target_lengths.to(DEVICE)

            logits = model(images)
            logits = logits.permute(2, 0, 1)
            log_probs = logits.log_softmax(2)

            input_lengths = torch.full(size=(images.size(0),), 
                                       fill_value=logits.size(0), 
                                       dtype=torch.long).to(DEVICE)

            loss = criterion(log_probs, labels, input_lengths, target_lengths)
            val_loss += loss.item()

    val_loss /= len(val_loader)
    print(f"Test Loss: {val_loss:.4f}")
    return val_loss
validate_test()

Test Loss: 0.4064


0.406448428829511

In [38]:
# Load best model
model.load_state_dict(torch.load("lprnet_best.pth", map_location=DEVICE))
model.eval()

test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, collate_fn=ctc_collate_fn)

with torch.no_grad():
    for images, labels, target_lengths in test_loader:
        images = images.to(DEVICE)
        logits = model(images)                     # [N, C, T]
        preds = logits.argmax(1)                   # simple greedy decode (still needs CTC decoding)
        print(preds.shape)  # torch.Size([32, 18])
        break


torch.Size([32, 18])


In [39]:
def greedy_decode(logits, idx_to_char, blank_idx):
    """
    logits: [T, N, C] tensor (log probs or raw logits)
    idx_to_char: dictionary mapping int -> char
    blank_idx: index of blank symbol
    """
    preds = logits.argmax(2).permute(1, 0)   # [N, T]

    results = []
    for pred in preds:
        string = ""
        prev = None
        for p in pred.cpu().numpy():
            if p != prev and p != blank_idx:   # collapse + remove blank
                string += idx_to_char[p]
            prev = p
        results.append(string)
    return results


In [40]:
def predict_image(model, image, dataset):
    """
    model   : trained model
    image   : tensor [3, 24, 94]
    dataset : dataset object (for idx_to_char, blank index)
    """
    model.eval()
    with torch.no_grad():
        image = image.unsqueeze(0).to(DEVICE)        # add batch dim [1, 3, 24, 94]
        logits = model(image)                        # [N, C, T]
        logits = logits.permute(2, 0, 1)             # [T, N, C]

        preds = greedy_decode(logits, dataset.idx_to_char, dataset.char_to_idx['-'])
        return preds[0]


In [41]:
# Take a sample from your test set

i = 9
img, _, _ = test_ds[i]
print("Ground truth:", test_ds.image_files[i])

pred = predict_image(model, img, test_ds)
print("Prediction  :", pred)


Ground truth: 75N1960G.png
Prediction  : 75N1960G


In [42]:
def evaluate_accuracy(model, dataset):
    model.eval()
    correct = 0
    total = len(dataset)

    with torch.no_grad():
        for i in range(total):
            img, label_encoded, _ = dataset[i]

            # Ground truth string
            label_str = "".join(dataset.idx_to_char[idx.item()] for idx in label_encoded)

            # Prediction
            pred_str = predict_image(model, img, dataset)

            if pred_str == label_str:
                correct += 1
            else:
                print(f"GT: {label_str} | Pred: {pred_str}")
                

    accuracy = correct / total
    return accuracy

# train_acc = evaluate_accuracy(model, train_ds)
# print(f"Train Accuracy: {train_acc:.2%}")

# val_acc = evaluate_accuracy(model, val_ds)
# print(f"Val Accuracy: {val_acc:.2%}")

test_acc = evaluate_accuracy(model, test_ds)
print(f"Test Accuracy: {test_acc:.2%}")


GT: 1033IR | Pred: 1033R
GT: 381ATK83 | Pred: 381ATK63
GT: 381ATK83 | Pred: 381ATK63
GT: 8427XX29 | Pred: B427XX29
GT: AC508V | Pred: C508V
GT: AL193VP | Pred: AL193HP
GT: AX128FT | Pred: AX12BFT
GT: B21GSB | Pred: B1GSB
GT: B225RTK | Pred: B115RTK
GT: B96SXV | Pred: B9SXV
GT: B96SXV | Pred: B9SXV
GT: BA999ZZ | Pred: BA999ZI
GT: BZ310SW | Pred: BZ310SN
GT: CM101LW | Pred: CM101L
GT: CV194VA | Pred: CQ194VA
GT: EJQ588 | Pred: E10588
GT: ES484LJ | Pred: ES464LJ
GT: FC882MC | Pred: FC82MC
GT: FF788NK | Pred: FFT0BNK
GT: FL986ZT | Pred: FL9B6ZT
GT: FR697HF | Pred: FR697H
GT: H864JGM | Pred: H864JHGM
GT: I008646 | Pred: 1008646
GT: IICE3083 | Pred: ICE30683
GT: KZ746AR | Pred: KZ7AGAR
GT: LBHT71 | Pred: LB
GT: MWWK908 | Pred: MWWK900
GT: PH20CNP | Pred: T6H61
GT: PI36KOG | Pred: PI36K0
GT: SG49711 | Pred: G49711
GT: SLF9995 | Pred: SL995
GT: TR95XMI | Pred: TR95XMH
GT: TR95XMI | Pred: TR95XMH
GT: VS236891 | Pred: S236891
GT: VS236891 | Pred: S236891
GT: WW921MX | Pred: WW921MK
GT: YB75711 |