# Reading characters on license plate

### Initialization

In [17]:
# Import packages
import torch
import torch.nn as nn
import torchvision.models as models
from PIL import Image
from torchvision import transforms
import os
from torch.utils.data import Dataset, DataLoader

In [23]:
# Define image size and possible characters
IMAGE_HEIGHT = 263 
IMAGE_WIDTH = 800  
TRAIN_DIR = ''
TEST_DIR = ''
CHARACTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
NUM_CHARACTERS = 7 # License plates have 7 characters

# Define the transformations
transform = transforms.Compose([
    transforms.Resize((IMAGE_HEIGHT, IMAGE_WIDTH)),  # Resize image
    transforms.ToTensor(),  # Convert image to a tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with ImageNet mean and std
])

### Load data

In [19]:
# Create a dataset class for our challenge
class LicenseData(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.transform = transform        
        # Read images from folder
        self.image_files = [f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]        

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        # Get the image file path
        img_name = self.image_files[idx]
        img_path = os.path.join(self.image_dir, img_name)

        # Load the image
        image = Image.open(img_path).convert('RGB')

        # Apply the image transformations
        if self.transform:
            image = self.transform(image)

        # Extract the license plate from the file name
        license_plate = os.path.splitext(img_name)[0]

        # Convert the license plate string to a list of integer indices
        label_tensor = torch.tensor([CHARACTERS.index(c) for c in license_plate], dtype=torch.long)

        return image, label_tensor

In [20]:
# Create dataset and dataloader
trainset = LicenseData(image_dir=TRAIN_DIR, transform=transform)
train_loader = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=4)

testset = LicenseData(image_dir=TEST_DIR, transform=transform)
test_loader = DataLoader(testset, batch_size=32, shuffle=False, num_workers=4)

FileNotFoundError: [WinError 3] The system cannot find the path specified: ''

### Implement model

In [24]:
# Load the pre-trained VGG16 model and freeze its earlier layers
vgg16 = models.vgg16(weights='VGG16_Weights.DEFAULT')
for param in vgg16.features.parameters():
    param.requires_grad = False

# Helper function to compute the flattened output size after passing through VGG16
def get_flattened_size(input_height, input_width):    
    dummy_input = torch.rand(1, 3, input_height, input_width)
    with torch.no_grad():
        features = vgg16.features(dummy_input)
    flattened_size = features.view(1, -1).size(1)
    return flattened_size

# Get the flattened size for the given image dimensions
flattened_size = get_flattened_size(IMAGE_HEIGHT, IMAGE_WIDTH)

In [25]:
# Model
class ReadPlate(nn.Module):
    def __init__(self, flattened, num_characters):
        super(ReadPlate, self).__init__()
        self.num_characters = num_characters        
        
        # Use the pretrained VGG16 model up to the feature extractor        
        self.vgg16_features = nn.Sequential(*list(vgg16.features.children()))
        
        # Fully connected layers after the VGG16 feature extraction
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(flattened, 512),
            nn.ReLU(),
            nn.Linear(512, num_characters * len(CHARACTERS)),  # Predict all characters at once            
        )        

    def forward(self, x):
        # Pass through VGG16 feature extractor
        x = self.vgg16_features(x)
        x = x.view(x.size(0), -1)  # Flatten
        
        # Pass through fully connected layers
        x = self.fc(x)
        
        # Reshape output to (batch_size, num_characters, num_classes)
        x = x.view(-1, self.num_characters, len(CHARACTERS))        
        return x

Source: https://github.com/ramyh08/Licence-Plate-recognition-CNN/tree/main

### Training

In [None]:
# Instantiate a model
model = ReadPlate(flattened=flattened_size, num_characters=NUM_CHARACTERS)

# Loss function: CrossEntropyLoss (expects raw logits, before applying softmax)
criterion = nn.CrossEntropyLoss()

# Optimizer: Adam optimizer for fine-tuning the final layers
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training loop
def train(model, dataloader, criterion, optimizer, num_epochs=10, device='cpu'):
    model = model.to(device)
    model.train()  # Set model to training mode

    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)  # Shape: [batch_size, num_characters]

            # Forward pass
            optimizer.zero_grad()
            outputs = model(images)  # Shape: [batch_size, num_characters, num_classes]

            # Reshape the output and labels
            outputs = outputs.view(-1, len(CHARACTERS)) # Shape: [batch_size * num_characters, num_classes]
            labels = labels.view(-1) # Shape: [batch_size * num_characters]

            # Compute the loss
            loss = criterion(outputs, labels)

            # Backpropagation and optimization
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(dataloader):.4f}')

### Testing

In [None]:
def validate(model, dataloader, criterion, device='cpu'):
    model = model.to(device)
    model.eval()  # Set model to evaluation mode
    validation_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)

            # Reshape the output and labels
            outputs = outputs.view(-1, len(CHARACTERS))
            labels = labels.view(-1)

            # Compute the loss
            loss = criterion(outputs, labels)
            validation_loss += loss.item()

            # Compute accuracy
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Validation Loss: {validation_loss/len(dataloader):.4f}, Accuracy: {accuracy:.2f}%')
    return accuracy

### Inference

In [11]:
# Perform inference over an image

img = 'C:/Users/User/Desktop/Assignatures/Vision and Learning/License Plate/vision-and-learning/P_G_12492604_01539.jpeg'
img_tensor = process_img(img) 
model_output = model(img_tensor)

characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
licenses = []

for element in model_output:    
    # Get the index of the highest probability class for each character
    pred = torch.argmax(element, dim=1) 
    license_plate = ''

    for character in pred:    
        # Map the predicted class index to the corresponding character
        predicted_character = characters[character.item()]          
        license_plate += predicted_character 
    licenses.append(license_plate)

print(f"Predicted License Plate: {licenses}")

Predicted License Plate: ['WVXMKA7']
