In [2]:
from model.LPRNet import build_lprnet
from torchsummary import summary

model = build_lprnet()
summary(model, input_size=(3, 24, 94))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 22, 92]           1,792
       BatchNorm2d-2           [-1, 64, 22, 92]             128
              ReLU-3           [-1, 64, 22, 92]               0
         MaxPool3d-4           [-1, 64, 20, 90]               0
            Conv2d-5           [-1, 32, 20, 90]           2,080
              ReLU-6           [-1, 32, 20, 90]               0
            Conv2d-7           [-1, 32, 20, 90]           3,104
              ReLU-8           [-1, 32, 20, 90]               0
            Conv2d-9           [-1, 32, 20, 90]           3,104
             ReLU-10           [-1, 32, 20, 90]               0
           Conv2d-11          [-1, 128, 20, 90]           4,224
small_basic_block-12          [-1, 128, 20, 90]               0
      BatchNorm2d-13          [-1, 128, 20, 90]             256
             ReLU-14          [-1, 128,

In [3]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
from torchvision import transforms
import string

class ImageFolderCTCDataset(Dataset):
    def __init__(self, folder_path, image_shape=(3, 24, 94)):
        self.folder_path = folder_path
        self.image_files = os.listdir(folder_path)
        _, height, width = image_shape
        self.transform = transforms.Compose([
            transforms.Resize((height, width)),
            transforms.ToTensor(),
        ])

        # Build dictionary
        self.chars = list(string.digits + string.ascii_uppercase)
        self.chars.append('-')  # blank for CTC
        self.char_to_idx = {ch: i for i, ch in enumerate(self.chars)}
        self.idx_to_char = {i: ch for i, ch in enumerate(self.chars)}

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.folder_path, img_name)

        # Load image
        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)

        # Extract label from filename (remove ext, cut at "_")
        label_str = img_name.split('.')[0].split('_')[0]

        # Encode string -> list of indices
        label_encoded = [self.char_to_idx[ch] for ch in label_str]

        return image, torch.tensor(label_encoded, dtype=torch.long), len(label_encoded)


In [4]:
IMAGE_SHAPE = (3, 24, 94)
train_ds = ImageFolderCTCDataset(r"lprds\train", image_shape=IMAGE_SHAPE)
val_ds = ImageFolderCTCDataset(r"lprds\val", image_shape=IMAGE_SHAPE)
test_ds = ImageFolderCTCDataset(r"lprds\test", image_shape=IMAGE_SHAPE)

img, label_encoded, label_length = train_ds[0]

print(img.shape)           # torch.Size([3, 24, 94])
print(label_encoded)       # tensor([ 0,  0, 21, 17,  2,  8,  7,  7])
print(label_length)        # 8



torch.Size([3, 24, 94])
tensor([ 0,  0, 21, 17,  2,  8,  7,  7])
8


In [5]:
from torch.utils.data import DataLoader


def ctc_collate_fn(batch):
    images, labels, lengths = zip(*batch)  # unzip list of tuples

    # Stack images [B, C, H, W]
    images = torch.stack(images, dim=0)

    # Concatenate all labels into one flat tensor
    labels = torch.cat(labels)

    # Convert lengths to tensor
    lengths = torch.tensor(lengths, dtype=torch.long)

    return images, labels, lengths


BATCH_SIZE = 32
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, collate_fn=ctc_collate_fn)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, collate_fn=ctc_collate_fn)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, collate_fn=ctc_collate_fn)


In [6]:
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, collate_fn=ctc_collate_fn)
val_loader   = DataLoader(val_ds, batch_size=32, shuffle=False, collate_fn=ctc_collate_fn)
test_loader  = DataLoader(test_ds, batch_size=32, shuffle=False, collate_fn=ctc_collate_fn)

# check a batch
images, labels, lengths = next(iter(train_loader))
print(images.shape)   # [32, 3, 24, 94]
print(labels.shape)   # flat 1D tensor, e.g. torch.Size([180])
print(lengths.shape)  # [32], lengths of each label


torch.Size([32, 3, 24, 94])
torch.Size([227])
torch.Size([32])


In [7]:
def make_input_lengths_from_logits(logits):
    # logits: (N, class_num, T)
    T = logits.size(2)
    batch_size = logits.size(0)
    # CTC expects input_lengths per sample (length of T for each sample)
    return torch.full((batch_size,), T, dtype=torch.long)

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

# -------- Config ----------
CLASS_NUM = 37              # number of classes (0-9, A-Z, plus blank)
MAX_LABEL_LEN = 10          # max characters per sample (fake, for model design)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
LR = 1e-3
EPOCHS = 50
BATCH_SIZE = 32
# --------------------------

# ===== Dataset & DataLoader =====
def ctc_collate_fn(batch):
    images, labels, lengths = zip(*batch)
    images = torch.stack(images, dim=0)
    labels = torch.cat(labels)
    lengths = torch.tensor(lengths, dtype=torch.long)
    return images, labels, lengths

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, collate_fn=ctc_collate_fn)
val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, collate_fn=ctc_collate_fn)

# ===== Model, Loss, Optimizer =====
model = build_lprnet(MAX_LABEL_LEN, CLASS_NUM).to(DEVICE)
criterion = nn.CTCLoss(blank=train_ds.char_to_idx['-'], reduction='mean')
optimizer = optim.Adam(model.parameters(), lr=LR)


# ===== Training & Validation =====
def train_one_epoch(epoch):
    model.train()
    running_loss = 0.0

    for batch_idx, (images, labels, target_lengths) in enumerate(train_loader):
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)
        target_lengths = target_lengths.to(DEVICE)

        # Forward
        logits = model(images)                     # [N, C, T]
        logits = logits.permute(2, 0, 1)           # [T, N, C]
        log_probs = logits.log_softmax(2)

        # Input lengths = all T
        input_lengths = torch.full(size=(images.size(0),), 
                                   fill_value=logits.size(0), 
                                   dtype=torch.long).to(DEVICE)

        # Loss
        loss = criterion(log_probs, labels, input_lengths, target_lengths)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if (batch_idx+1) % 10 == 0:
            print(f"Epoch [{epoch+1}], Step [{batch_idx+1}/{len(train_loader)}], "
                  f"Loss: {running_loss/10:.4f}")
            running_loss = 0.0


def validate(epoch):
    model.eval()
    val_loss = 0.0

    with torch.no_grad():
        for images, labels, target_lengths in val_loader:
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)
            target_lengths = target_lengths.to(DEVICE)

            logits = model(images)
            logits = logits.permute(2, 0, 1)
            log_probs = logits.log_softmax(2)

            input_lengths = torch.full(size=(images.size(0),), 
                                       fill_value=logits.size(0), 
                                       dtype=torch.long).to(DEVICE)

            loss = criterion(log_probs, labels, input_lengths, target_lengths)
            val_loss += loss.item()

    val_loss /= len(val_loader)
    print(f"Epoch [{epoch+1}] Validation Loss: {val_loss:.4f}")
    return val_loss


# ===== Main Loop =====
best_val_loss = float("inf")

for epoch in range(EPOCHS):
    train_one_epoch(epoch)
    val_loss = validate(epoch)

    # Save best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "lprnet_best.pth")
        print(f"✅ Saved best model at epoch {epoch+1} with val_loss={val_loss:.4f}")


Epoch [1], Step [10/19], Loss: 5.6086
Epoch [1] Validation Loss: 52.4369
✅ Saved best model at epoch 1 with val_loss=52.4369
Epoch [2], Step [10/19], Loss: 3.7520
Epoch [2] Validation Loss: 13.6140
✅ Saved best model at epoch 2 with val_loss=13.6140
Epoch [3], Step [10/19], Loss: 3.6394
Epoch [3] Validation Loss: 7.6765
✅ Saved best model at epoch 3 with val_loss=7.6765
Epoch [4], Step [10/19], Loss: 3.5888
Epoch [4] Validation Loss: 5.5754
✅ Saved best model at epoch 4 with val_loss=5.5754
Epoch [5], Step [10/19], Loss: 3.5460
Epoch [5] Validation Loss: 3.6545
✅ Saved best model at epoch 5 with val_loss=3.6545
Epoch [6], Step [10/19], Loss: 3.4927
Epoch [6] Validation Loss: 3.8135
Epoch [7], Step [10/19], Loss: 3.3886
Epoch [7] Validation Loss: 3.7435
Epoch [8], Step [10/19], Loss: 3.3040
Epoch [8] Validation Loss: 3.5553
✅ Saved best model at epoch 8 with val_loss=3.5553
Epoch [9], Step [10/19], Loss: 3.0602
Epoch [9] Validation Loss: 3.2087
✅ Saved best model at epoch 9 with val_los

In [14]:
def validate_test():
    model.eval()
    val_loss = 0.0

    with torch.no_grad():
        for images, labels, target_lengths in test_loader:
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)
            target_lengths = target_lengths.to(DEVICE)

            logits = model(images)
            logits = logits.permute(2, 0, 1)
            log_probs = logits.log_softmax(2)

            input_lengths = torch.full(size=(images.size(0),), 
                                       fill_value=logits.size(0), 
                                       dtype=torch.long).to(DEVICE)

            loss = criterion(log_probs, labels, input_lengths, target_lengths)
            val_loss += loss.item()

    val_loss /= len(val_loader)
    print(f"Test Loss: {val_loss:.4f}")
    return val_loss
validate_test()

Test Loss: 0.6498


0.649839868148168

In [None]:
# Load best model
model.load_state_dict(torch.load("lprnet_best.pth", map_location=DEVICE))
model.eval()

test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, collate_fn=ctc_collate_fn)

with torch.no_grad():
    for images, labels, target_lengths in test_loader:
        images = images.to(DEVICE)
        logits = model(images)                     # [N, C, T]
        preds = logits.argmax(1)                   # simple greedy decode (still needs CTC decoding)
        print(preds.shape)  # torch.Size([32, 18])
        break


torch.Size([32, 18])


In [15]:
def greedy_decode(logits, idx_to_char, blank_idx):
    """
    logits: [T, N, C] tensor (log probs or raw logits)
    idx_to_char: dictionary mapping int -> char
    blank_idx: index of blank symbol
    """
    preds = logits.argmax(2).permute(1, 0)   # [N, T]

    results = []
    for pred in preds:
        string = ""
        prev = None
        for p in pred.cpu().numpy():
            if p != prev and p != blank_idx:   # collapse + remove blank
                string += idx_to_char[p]
            prev = p
        results.append(string)
    return results


In [16]:
def predict_image(model, image, dataset):
    """
    model   : trained model
    image   : tensor [3, 24, 94]
    dataset : dataset object (for idx_to_char, blank index)
    """
    model.eval()
    with torch.no_grad():
        image = image.unsqueeze(0).to(DEVICE)        # add batch dim [1, 3, 24, 94]
        logits = model(image)                        # [N, C, T]
        logits = logits.permute(2, 0, 1)             # [T, N, C]

        preds = greedy_decode(logits, dataset.idx_to_char, dataset.char_to_idx['-'])
        return preds[0]


In [35]:
# Take a sample from your test set
i = 9
img, _, _ = test_ds[i]
print("Ground truth:", test_ds.image_files[i])

pred = predict_image(model, img, test_ds)
print("Prediction  :", pred)


Ground truth: 75N1960G.png
Prediction  : 75N19600
