In [None]:
from model.LPRNet import build_lprnet
from torchsummary import summary

model = build_lprnet()
summary(model, input_size=(1, 24, 94))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 18, 88]             400
         MaxPool2d-2             [-1, 8, 9, 44]               0
              ReLU-3             [-1, 8, 9, 44]               0
            Conv2d-4            [-1, 10, 5, 40]           2,010
         MaxPool2d-5            [-1, 10, 2, 20]               0
              ReLU-6            [-1, 10, 2, 20]               0
            Linear-7                   [-1, 32]          12,832
              ReLU-8                   [-1, 32]               0
            Linear-9                    [-1, 6]             198
           Conv2d-10            [-1, 3, 24, 94]               6
             ReLU-11            [-1, 3, 24, 94]               0
           Conv2d-12           [-1, 64, 22, 92]           1,792
      BatchNorm2d-13           [-1, 64, 22, 92]             128
             ReLU-14           [-1, 64,

In [3]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
from torchvision import transforms
import string

class ImageFolderCTCDataset(Dataset):
    def __init__(self, folder_path, image_shape=(1, 24, 94), augment=False):
        self.folder_path = folder_path
        self.image_files = os.listdir(folder_path)
        _, height, width = image_shape

        if augment:
            self.transform = transforms.Compose([
                transforms.Resize((height, width)),
                transforms.RandomAffine(
                    degrees=5,
                    translate=(0.05, 0.05),
                    scale=(0.9, 1.1),
                    shear=0
                ),
                transforms.Grayscale(num_output_channels=1),
                transforms.ToTensor()   # grayscale tensor in [0,1]
            ])
        else:
            self.transform = transforms.Compose([
                transforms.Resize((height, width)),
                transforms.Grayscale(num_output_channels=1),
                transforms.ToTensor()   # grayscale tensor in [0,1]
            ])

        # dictionary build
        self.chars = list(string.digits + string.ascii_uppercase)
        self.chars.append('-')
        self.char_to_idx = {ch: i for i, ch in enumerate(self.chars)}
        self.idx_to_char = {i: ch for i, ch in enumerate(self.chars)}

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.folder_path, img_name)

        image = Image.open(img_path).convert("L")  # grayscale
        image = self.transform(image)

        # Extract label
        label_str = img_name.split('.')[0].split('_')[0]
        label_encoded = [self.char_to_idx[ch] for ch in label_str]

        return image, torch.tensor(label_encoded, dtype=torch.long), len(label_encoded)


In [None]:
IMAGE_SHAPE = (3, 24, 94)
data_set_folder = r"LPRNet_Pytorch/data/split/"
train_ds = ImageFolderCTCDataset(data_set_folder + "/train", image_shape=IMAGE_SHAPE, augment=True)
val_ds = ImageFolderCTCDataset(data_set_folder + "/val", image_shape=IMAGE_SHAPE)
test_ds = ImageFolderCTCDataset(data_set_folder + "/test", image_shape=IMAGE_SHAPE)

img, label_encoded, label_length = train_ds[0]

print(img.shape)           # torch.Size([3, 24, 94])
print(label_encoded)       # tensor([ 0,  0, 21, 17,  2,  8,  7,  7])
print(label_length)        # 8



torch.Size([1, 24, 94])
tensor([ 0,  0, 21, 17,  2,  8,  7,  7])
8


In [5]:
from torch.utils.data import DataLoader


def ctc_collate_fn(batch):
    images, labels, lengths = zip(*batch)  # unzip list of tuples

    # Stack images [B, C, H, W]
    images = torch.stack(images, dim=0)

    # Concatenate all labels into one flat tensor
    labels = torch.cat(labels)

    # Convert lengths to tensor
    lengths = torch.tensor(lengths, dtype=torch.long)

    return images, labels, lengths


BATCH_SIZE = 32
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, collate_fn=ctc_collate_fn)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, collate_fn=ctc_collate_fn)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, collate_fn=ctc_collate_fn)


In [6]:

# check a batch
images, labels, lengths = next(iter(train_loader))
print(images.shape)   # [32, 3, 24, 94]
print(labels.shape)   # flat 1D tensor, e.g. torch.Size([180])
print(lengths.shape)  # [32], lengths of each label


torch.Size([32, 1, 24, 94])
torch.Size([219])
torch.Size([32])


In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

# -------- Config ----------
CLASS_NUM = 37              # number of classes (0-9, A-Z, plus blank)
MAX_LABEL_LEN = 10          # max characters per sample (fake, for model design)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
LR = 1e-3
EPOCHS = 100
BATCH_SIZE = 32
# --------------------------

# ===== Dataset & DataLoader =====
def ctc_collate_fn(batch):
    images, labels, lengths = zip(*batch)
    images = torch.stack(images, dim=0)
    labels = torch.cat(labels)
    lengths = torch.tensor(lengths, dtype=torch.long)
    return images, labels, lengths

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, collate_fn=ctc_collate_fn)
val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, collate_fn=ctc_collate_fn)

# ===== Model, Loss, Optimizer =====
model = build_lprnet(MAX_LABEL_LEN, CLASS_NUM).to(DEVICE)
model.load_state_dict(torch.load("lprnet_best.pth", map_location=DEVICE))

criterion = nn.CTCLoss(blank=train_ds.char_to_idx['-'], reduction='mean')
optimizer = optim.Adam(model.parameters(), lr=LR)


# ===== Training & Validation =====
def train_one_epoch(epoch):
    model.train()
    running_loss = 0.0

    for batch_idx, (images, labels, target_lengths) in enumerate(train_loader):
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)
        target_lengths = target_lengths.to(DEVICE)

        # Forward
        logits = model(images)                     # [N, C, T]
        logits = logits.permute(2, 0, 1)           # [T, N, C]
        log_probs = logits.log_softmax(2)

        # Input lengths = all T
        input_lengths = torch.full(size=(images.size(0),), 
                                   fill_value=logits.size(0), 
                                   dtype=torch.long).to(DEVICE)

        # Loss
        loss = criterion(log_probs, labels, input_lengths, target_lengths)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if (batch_idx+1) % 10 == 0:
            print(f"Epoch [{epoch+1}], Step [{batch_idx+1}/{len(train_loader)}], "
                  f"Loss: {running_loss/10:.4f}")
            running_loss = 0.0


def validate(epoch):
    model.eval()
    val_loss = 0.0

    with torch.no_grad():
        for images, labels, target_lengths in val_loader:
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)
            target_lengths = target_lengths.to(DEVICE)

            logits = model(images)
            logits = logits.permute(2, 0, 1)
            log_probs = logits.log_softmax(2)

            input_lengths = torch.full(size=(images.size(0),), 
                                       fill_value=logits.size(0), 
                                       dtype=torch.long).to(DEVICE)

            loss = criterion(log_probs, labels, input_lengths, target_lengths)
            val_loss += loss.item()

    val_loss /= len(val_loader)
    print(f"Epoch [{epoch+1}] Validation Loss: {val_loss:.4f}")
    return val_loss

In [None]:
# ===== Main Loop =====
best_val_loss = float("inf")

for epoch in range(EPOCHS):
    train_one_epoch(epoch)
    val_loss = validate(epoch)

    # Save best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "chinese_lprnet_best.pth")
        print(f"✅ Saved best model at epoch {epoch+1} with val_loss={val_loss:.4f}")


Epoch [1], Step [10/19], Loss: 0.3170
Epoch [1] Validation Loss: 0.5419
✅ Saved best model at epoch 1 with val_loss=0.5419
Epoch [2], Step [10/19], Loss: 0.3155
Epoch [2] Validation Loss: 0.3242
✅ Saved best model at epoch 2 with val_loss=0.3242
Epoch [3], Step [10/19], Loss: 0.2842
Epoch [3] Validation Loss: 0.3150
✅ Saved best model at epoch 3 with val_loss=0.3150
Epoch [4], Step [10/19], Loss: 0.2652
Epoch [4] Validation Loss: 0.3646
Epoch [5], Step [10/19], Loss: 0.2988
Epoch [5] Validation Loss: 0.4781
Epoch [6], Step [10/19], Loss: 0.3177
Epoch [6] Validation Loss: 0.3450
Epoch [7], Step [10/19], Loss: 0.2559
Epoch [7] Validation Loss: 0.5174
Epoch [8], Step [10/19], Loss: 0.3038
Epoch [8] Validation Loss: 0.4544
Epoch [9], Step [10/19], Loss: 0.2997
Epoch [9] Validation Loss: 0.3179
Epoch [10], Step [10/19], Loss: 0.2734
Epoch [10] Validation Loss: 0.4233
Epoch [11], Step [10/19], Loss: 0.2697
Epoch [11] Validation Loss: 0.3415
Epoch [12], Step [10/19], Loss: 0.2870
Epoch [12] V

In [9]:
def validate_test():
    model.eval()
    val_loss = 0.0

    with torch.no_grad():
        for images, labels, target_lengths in test_loader:
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)
            target_lengths = target_lengths.to(DEVICE)

            logits = model(images)
            logits = logits.permute(2, 0, 1)
            log_probs = logits.log_softmax(2)

            input_lengths = torch.full(size=(images.size(0),), 
                                       fill_value=logits.size(0), 
                                       dtype=torch.long).to(DEVICE)

            loss = criterion(log_probs, labels, input_lengths, target_lengths)
            val_loss += loss.item()

    val_loss /= len(val_loader)
    return val_loss

print(f"Test Loss: {validate_test():.4f}") 

Test Loss: 0.1634


In [10]:
# Load best model
model.load_state_dict(torch.load("lprnet_best.pth", map_location=DEVICE))
model.eval()

test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, collate_fn=ctc_collate_fn)

with torch.no_grad():
    for images, labels, target_lengths in test_loader:
        images = images.to(DEVICE)
        logits = model(images)                     # [N, C, T]
        preds = logits.argmax(1)                   # simple greedy decode (still needs CTC decoding)
        print(preds.shape)  # torch.Size([32, 18])
        break


torch.Size([32, 18])


In [11]:
def greedy_decode(logits, idx_to_char, blank_idx):
    """
    logits: [T, N, C] tensor (log probs or raw logits)
    idx_to_char: dictionary mapping int -> char
    blank_idx: index of blank symbol
    """
    preds = logits.argmax(2).permute(1, 0)   # [N, T]

    results = []
    for pred in preds:
        string = ""
        prev = None
        for p in pred.cpu().numpy():
            if p != prev and p != blank_idx:   # collapse + remove blank
                string += idx_to_char[p]
            prev = p
        results.append(string)
    return results


In [12]:
def predict_image(model, image, dataset):
    """
    model   : trained model
    image   : tensor [3, 24, 94]
    dataset : dataset object (for idx_to_char, blank index)
    """
    model.eval()
    with torch.no_grad():
        image = image.unsqueeze(0).to(DEVICE)        # add batch dim [1, 3, 24, 94]
        logits = model(image)                        # [N, C, T]
        logits = logits.permute(2, 0, 1)             # [T, N, C]

        preds = greedy_decode(logits, dataset.idx_to_char, dataset.char_to_idx['-'])
        return preds[0]


In [None]:
# Take a sample from your test set
i = 9
img, _, _ = test_ds[i]
print("Ground truth:", test_ds.image_files[i])

pred = predict_image(model, img, test_ds)
print("Prediction  :", pred)


Ground truth: A09N61.jpg
Prediction  : 7M1


In [None]:
import os
import cv2
import torch

def evaluate_and_save(model, dataset, output_dir="results"):
    model.eval()
    correct = 0
    total = len(dataset)

    # Prepare output folders
    right_dir = os.path.join(output_dir, "right")
    wrong_dir = os.path.join(output_dir, "wrong")
    os.makedirs(right_dir, exist_ok=True)
    os.makedirs(wrong_dir, exist_ok=True)

    with torch.no_grad():
        for i in range(total):
            img, label_encoded, _ = dataset[i]

            # Ground truth string
            label_str = "".join(dataset.idx_to_char[idx.item()] for idx in label_encoded)

            # Prediction
            pred_str = predict_image(model, img, dataset)

            # Convert tensor -> numpy for saving
            if isinstance(img, torch.Tensor):
                np_img = img.squeeze().cpu().numpy() * 255.0
                np_img = np_img.astype("uint8")
            else:
                np_img = img

            # Build filename: prediction_GT_index.png
            filename = f"{pred_str}_GT-{label_str}_{i}.png"

            if pred_str == label_str:
                correct += 1
                save_path = os.path.join(right_dir, filename)
            else:
                save_path = os.path.join(wrong_dir, filename)

            # Save with OpenCV
            cv2.imwrite(save_path, np_img)

    accuracy = correct / total
    return accuracy


# Run

test_acc = evaluate_and_save(model, test_ds, output_dir="predictions")
print(f"Test Accuracy: {test_acc:.2%}")


Test Accuracy: 0.00%
