# Reading characters on license plate

### Initialization

In [38]:
# Import packages
import torch
import torch.nn as nn
import torchvision.models as models
from PIL import Image
from torchvision import transforms
import os
from torch.utils.data import Dataset, DataLoader

In [39]:
# Define image size and possible characters
IMAGE_HEIGHT = 263 
IMAGE_WIDTH = 800  
TRAIN_DIR = 'Cars\Dataset Cars\Train'
TEST_DIR = 'Cars\Dataset Cars\Validation'
CHARACTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
NUM_CHARACTERS = 7 # License plates have 7 characters

# Define the transformations
transform = transforms.Compose([
    transforms.Resize((IMAGE_HEIGHT, IMAGE_WIDTH)),  # Resize image
    transforms.ToTensor(),  # Convert image to a tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with ImageNet mean and std
])

### Load data

In [40]:
# Create a dataset class for our challenge
class LicenseData(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.transform = transform        
        # Read images from folder
        self.image_files = [f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]        

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        # Get the image file path
        img_name = self.image_files[idx]
        img_path = os.path.join(self.image_dir, img_name)

        # Load the image
        image = Image.open(img_path).convert('RGB')

        # Apply the image transformations
        if self.transform:
            image = self.transform(image)

        # Extract the license plate from the file name
        license_plate = os.path.splitext(img_name)[0]

        # Convert the license plate string to a list of integer indices
        label_tensor = torch.tensor([CHARACTERS.index(c) for c in license_plate], dtype=torch.long)

        return image, label_tensor

In [41]:
# Create dataset and dataloader
trainset = LicenseData(image_dir=TRAIN_DIR, transform=transform)
train_loader = DataLoader(trainset, batch_size=32, shuffle=True)

testset = LicenseData(image_dir=TEST_DIR, transform=transform)
test_loader = DataLoader(testset, batch_size=32, shuffle=False)

### Implement model

In [42]:
# Load the pre-trained VGG16 model and freeze its earlier layers
vgg16 = models.vgg16(weights='VGG16_Weights.DEFAULT')
for param in vgg16.features.parameters():
    param.requires_grad = False

# Helper function to compute the flattened output size after passing through VGG16
def get_flattened_size(input_height, input_width):    
    dummy_input = torch.rand(1, 3, input_height, input_width)
    with torch.no_grad():
        features = vgg16.features(dummy_input)
    flattened_size = features.view(1, -1).size(1)
    return flattened_size

# Get the flattened size for the given image dimensions
flattened_size = get_flattened_size(IMAGE_HEIGHT, IMAGE_WIDTH)

In [43]:
# Model
class ReadPlate(nn.Module):
    def __init__(self, flattened, num_characters):
        super(ReadPlate, self).__init__()
        self.num_characters = num_characters        
        
        # Use the pretrained VGG16 model up to the feature extractor        
        self.vgg16_features = nn.Sequential(*list(vgg16.features.children()))
        
        # Fully connected layers after the VGG16 feature extraction
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(flattened, 512),
            nn.ReLU(),
            nn.Linear(512, num_characters * len(CHARACTERS)),  # Predict all characters at once            
        )        

    def forward(self, x):
        # Pass through VGG16 feature extractor
        x = self.vgg16_features(x)
        x = x.view(x.size(0), -1)  # Flatten
        
        # Pass through fully connected layers
        x = self.fc(x)
        
        # Reshape output to (batch_size, num_characters, num_classes)
        x = x.view(-1, self.num_characters, len(CHARACTERS))        
        return x

Source: https://github.com/ramyh08/Licence-Plate-recognition-CNN/tree/main

### Training

In [44]:
# Instantiate a model
model = ReadPlate(flattened=flattened_size, num_characters=NUM_CHARACTERS)

# Loss function: CrossEntropyLoss (expects raw logits, before applying softmax)
criterion = nn.CrossEntropyLoss()

# Optimizer: Adam optimizer for fine-tuning the final layers
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [45]:
def save_checkpoint(model, optimizer, epoch, loss, checkpoint_path='checkpoint.pth'):
    """Saves a model checkpoint with model state, optimizer state, epoch, and loss."""
    torch.save({
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'epoch': epoch,
        'loss': loss
    }, checkpoint_path)
    print(f"Checkpoint saved at epoch {epoch}")

In [46]:
# Training loop
def train(model, dataloader, criterion, optimizer, num_epochs=10, checkpoint='', save_every=10, device='cpu'):
    model = model.to(device)
    model.train()  # Set model to training mode

    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)  # Shape: [batch_size, num_characters]

            print('here')

            # Forward pass
            optimizer.zero_grad()
            outputs = model(images)  # Shape: [batch_size, num_characters, num_classes]

            # Reshape the output and labels
            outputs = outputs.view(-1, len(CHARACTERS)) # Shape: [batch_size * num_characters, num_classes]
            labels = labels.view(-1) # Shape: [batch_size * num_characters]

            # Compute the loss
            loss = criterion(outputs, labels)

            # Backpropagation and optimization
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        avg_loss = running_loss / len(dataloader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')

        # Save checkpoint at specified intervals
        if (epoch + 1) % save_every == 0:
            save_checkpoint(model, optimizer, epoch + 1, avg_loss, checkpoint+str((epoch+1))+'.pth')

### Testing

In [47]:
def validate(model, dataloader, criterion, device='cpu'):
    model = model.to(device)
    model.eval()  # Set model to evaluation mode
    validation_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)

            # Reshape the output and labels
            outputs = outputs.view(-1, len(CHARACTERS))
            labels = labels.view(-1)

            # Compute the loss
            loss = criterion(outputs, labels)
            validation_loss += loss.item()

            # Compute accuracy
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Validation Loss: {validation_loss/len(dataloader):.4f}, Accuracy: {accuracy:.2f}%')
    return accuracy

### Inference

In [48]:
def load_checkpoint(model, optimizer=None, checkpoint_path='checkpoint.pth'):
    """Loads a model and optionally an optimizer from a checkpoint file."""
    # Load the checkpoint from file
    checkpoint = torch.load(checkpoint_path)

    # Load model state dictionary
    model.load_state_dict(checkpoint['model_state_dict'])
    
    # If optimizer is provided, load the optimizer state dictionary (used when resuming training)
    if optimizer is not None:
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    
    # Get the last epoch and loss
    epoch = checkpoint.get('epoch', 0)
    loss = checkpoint.get('loss', None)
    
    print(f"Checkpoint loaded: Resuming from epoch {epoch}, loss: {loss:.4f}")
    
    return model, optimizer, epoch, loss

In [49]:
def process_img(img_path):
    # Load and preprocess the image
    image = Image.open(img_path).convert('RGB') 
    image_tensor = transform(image)
    image_tensor = image_tensor.unsqueeze(0)  # Add a batch dimension
    return image_tensor

def predict_license_plate(model, image_tensor):
    # Set the model to evaluation mode
    model.eval()
    
    # Perform inference
    with torch.no_grad():
        outputs = model(image_tensor)  # Output shape: [1, num_characters, num_classes]    
    
    outputs = outputs.view(-1, len(CHARACTERS))  # Shape: [num_characters, num_classes]
    
    # Get predictions by taking argmax across the class dimension
    _, predicted_indices = torch.max(outputs, dim=1)
    
    # Map the indices to characters    
    license_plate = ''.join([CHARACTERS[idx.item()] for idx in predicted_indices])
    
    return license_plate

## Main

In [50]:
model = ReadPlate(flattened=flattened_size, num_characters=NUM_CHARACTERS)
results_name = 'Cars/Dataset Cars/Models/readlicense'

In [51]:
# Start training
train(model, train_loader, criterion, optimizer, checkpoint=results_name, save_every=10, num_epochs=10)

here
here
here
here
here
here
here
here
here
here
Epoch [1/10], Loss: 3.5862
here
here
here
here
here
here
here
here
here
here
Epoch [2/10], Loss: 3.5880
here
here
here
here
here
here
here
here
here
here
Epoch [3/10], Loss: 3.5932
here
here
here
here
here
here
here
here
here
here
Epoch [4/10], Loss: 3.5927
here
here
here
here
here
here
here
here
here
here
Epoch [5/10], Loss: 3.5908
here
here
here
here
here
here
here
here
here
here
Epoch [6/10], Loss: 3.5872
here
here
here
here
here
here
here
here
here
here
Epoch [7/10], Loss: 3.5955
here
here
here
here
here
here
here
here
here
here
Epoch [8/10], Loss: 3.5917
here
here


KeyboardInterrupt: 

In [None]:
# Start testing
accuracy = validate(model, test_loader, criterion)

In [45]:
# Start inference
img_path = 'C:/Users/User/Desktop/Assignatures/Vision and Learning/License Plate/vision-and-learning/P_G_12492604_01539.jpeg'

# Load the model weights if you have saved them
#checkpoint_path = ''
#model, optimizer, epoch, loss = load_checkpoint(model, optimizer, checkpoint_path)

image_tensor = process_img(img_path)
license_plate = predict_license_plate(model, image_tensor)

print(f"Predicted License Plate: {license_plate}")

Predicted License Plate: 4H1SMCI
