# Reading characters on license plate

### Initialization

In [2]:
# Import packages
import torch
import torch.nn as nn
import torchvision.models as models
from PIL import Image
from torchvision import transforms

In [3]:
# Pipeline to prepare the images to feed the model

# Define image size
IMAGE_HEIGHT = 263 
IMAGE_WIDTH = 800  

# Define the transformations
preprocess = transforms.Compose([
    transforms.Resize((IMAGE_HEIGHT, IMAGE_WIDTH)),  # Resize image
    transforms.ToTensor(),  # Convert image to a tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # Normalize with ImageNet mean
                         std=[0.229, 0.224, 0.225])  # Normalize with ImageNet std
])

def process_img(img):
    # Load the image
    image = Image.open(img).convert('RGB')  # Ensure it's RGB

    # Apply transformations
    image_tensor = preprocess(image)

    # Add a batch dimension (for a single image)
    image_tensor = image_tensor.unsqueeze(0)  # Shape: [1, 3, 80, 224]

    return image_tensor

### Implement model

In [4]:
# Usage of the pre-trained model VGG16

# Define the number of classes and length of the license plate
NUM_CLASSES = 36   # 26 letters + 10 digits for alphanumeric characters
NUM_CHARACTERS = 7 # License plates have 7 characters

# Load the pre-trained VGG16 model and freeze its earlier layers
vgg16 = models.vgg16(weights='VGG16_Weights.DEFAULT')
for param in vgg16.features.parameters():
    param.requires_grad = False

# Helper function to compute the flattened output size after passing through VGG16
def get_flattened_size(input_height, input_width):    
    dummy_input = torch.rand(1, 3, input_height, input_width)
    with torch.no_grad():
        features = vgg16.features(dummy_input)
    flattened_size = features.view(1, -1).size(1)
    return flattened_size

# Get the flattened size for the given image dimensions
flattened_size = get_flattened_size(IMAGE_HEIGHT, IMAGE_WIDTH)

In [6]:
# Model

class ReadPlate(nn.Module):
    def __init__(self, flattened, num_characters, num_classes):
        super(ReadPlate, self).__init__()
        self.num_characters = num_characters
        self.num_classes = num_classes
        
        # Use the pretrained VGG16 model up to the feature extractor        
        self.vgg16_features = nn.Sequential(*list(vgg16.features.children()))
        
        # Fully connected layers after the VGG16 feature extraction
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(flattened, 512),
            nn.ReLU(),
            nn.Linear(512, num_characters * num_classes),  # Predict all characters at once
            nn.LogSoftmax(dim=1)
        )        

    def forward(self, x):
        # Pass through VGG16 feature extractor
        x = self.vgg16_features(x)
        x = x.view(x.size(0), -1)  # Flatten
        
        # Pass through fully connected layers
        x = self.fc(x)
        
        # Reshape output to (batch_size, num_characters, num_classes)
        x = x.view(-1, self.num_characters, self.num_classes)        
        return x  
    

model = ReadPlate(flattened=flattened_size, num_characters=NUM_CHARACTERS, num_classes=NUM_CLASSES)

Source: https://github.com/ramyh08/Licence-Plate-recognition-CNN/tree/main

### Inference

In [11]:
# Perform inference over an image

img = 'C:/Users/User/Desktop/Assignatures/Vision and Learning/License Plate/vision-and-learning/P_G_12492604_01539.jpeg'
img_tensor = process_img(img) 
model_output = model(img_tensor)

characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
licenses = []

for element in model_output:    
    # Get the index of the highest probability class for each character
    pred = torch.argmax(element, dim=1) 
    license_plate = ''

    for character in pred:    
        # Map the predicted class index to the corresponding character
        predicted_character = characters[character.item()]          
        license_plate += predicted_character 
    licenses.append(license_plate)

print(f"Predicted License Plate: {licenses}")

Predicted License Plate: ['WVXMKA7']
