<a href="https://colab.research.google.com/github/sujhaan/Licence_plate_recognition/blob/main/Liecense_plate_recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from concurrent.futures import ThreadPoolExecutor, as_completed

# Paths to datasets and CSV files
train_set1_dir = '/content/drive/MyDrive/License_plate/Train_dataset_1/license_plates_recognition_train'
train_set2_dir = '/content/drive/MyDrive/License_plate/Train_dataset_2/license_plates_detection_train'
test_set_dir = '/content/drive/MyDrive/License_plate/test/test'
bbox_csv_path = '/content/drive/MyDrive/License_plate/Bounding_box.csv.csv'
text_csv_path = '/content/drive/MyDrive/License_plate/Text_annotation.csv.csv'

# Load annotations
bbox_df = pd.read_csv(bbox_csv_path)
text_df = pd.read_csv(text_csv_path)

def load_annotations_for_image(image_file):
    bbox = bbox_df[bbox_df['img_id'] == image_file]
    if bbox.empty:
        print(f"Warning: No bounding box annotations found for image file: {image_file}")
        return None
    return bbox['ymin'].values[0], bbox['xmin'].values[0], bbox['ymax'].values[0], bbox['xmax'].values[0]

def load_label_for_image(image_file):
    label = text_df[text_df['img_id'] == image_file]['text']
    if label.empty:
        print(f"Warning: No text annotation found for image file: {image_file}")
        return ''
    return label.values[0]

def load_and_preprocess_data(img_file, directory, is_test=False):
    img_path = os.path.join(directory, img_file)
    img = cv2.imread(img_path)
    if img is None:
        print(f"Warning: Unable to read image file: {img_file}")
        return None, None, None

    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)  # Use INTER_AREA for resizing
    img = img / 255.0  # Normalize

    if not is_test:
        bbox = load_annotations_for_image(img_file)
        if bbox is None:
            return None, None, None
        label = load_label_for_image(img_file)
        return img, bbox, label
    else:
        return img, [0, 0, 0, 0], ''  # Dummy values for bounding boxes and labels

def load_data_in_parallel(directory, is_test=False):
    img_files = os.listdir(directory)
    images, bboxes, labels = [], [], []
    with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
        futures = {executor.submit(load_and_preprocess_data, img_file, directory, is_test): img_file for img_file in img_files}
        for future in as_completed(futures):
            img, bbox, label = future.result()
            if img is not None:
                images.append(img)
                bboxes.append(bbox)
                labels.append(label)
    return np.array(images), np.array(bboxes), np.array(labels)

# Load data
train_images1, train_bboxes, train_labels = load_data_in_parallel(train_set1_dir)

# Remove samples with missing annotations or labels
indices = np.where(~np.isnan(train_bboxes).any(axis=1) & (train_labels != ''))
train_images1 = train_images1[indices]
train_bboxes = train_bboxes[indices]
train_labels = train_labels[indices]

# Print lengths for debugging
print(f"Length of train_images1: {len(train_images1)}")
print(f"Length of train_bboxes: {len(train_bboxes)}")
print(f"Length of train_labels: {len(train_labels)}")

# Ensure consistent sample sizes before splitting
assert len(train_images1) == len(train_bboxes) == len(train_labels), "Inconsistent sample sizes before splitting."

# Create a character to integer mapping
characters = {char for label in train_labels for char in label}
char_to_int = {char: i for i, char in enumerate(sorted(characters))}
num_classes = len(char_to_int) + 1  # Add 1 for padding value

# Convert labels to integer sequences
def encode_labels(labels):
    return [[char_to_int.get(char, 0) for char in label] for label in labels]

def pad_sequences_to_max_length(sequences):
    max_length = max(len(seq) for seq in sequences)
    return pad_sequences(sequences, maxlen=max_length, padding='post')

# Encode and pad the labels
train_labels_encoded = encode_labels(train_labels)
train_labels_padded = pad_sequences_to_max_length(train_labels_encoded)

# One-hot encode the labels
train_labels_categorical = np.zeros((len(train_labels_padded), len(train_labels_padded[0]), num_classes))
for i, seq in enumerate(train_labels_padded):
    train_labels_categorical[i, np.arange(len(seq)), seq] = 1

# Split the dataset into training and validation sets
X_train, X_val, y_train, y_val, bbox_train, bbox_val = train_test_split(
    train_images1, train_labels_categorical, train_bboxes, test_size=0.2, random_state=42
)

# Define the model architecture
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = Flatten()(x)
bbox_output = Dense(4, activation='linear', name='bbox_output')(x)

model = Model(inputs=base_model.input, outputs=bbox_output)

# Compile the model
model.compile(optimizer=Adam(learning_rate=1e-4), loss={'bbox_output': 'mean_squared_error'})

# Model summary
model.summary()

# Train the model
history = model.fit(
    X_train, {'bbox_output': bbox_train},
    validation_data=(X_val, {'bbox_output': bbox_val}),
    epochs=10, batch_size=32, verbose=1
)

# Evaluate the model on validation set
val_loss = model.evaluate(X_val, {'bbox_output': bbox_val})
print(f"Validation Loss: {val_loss}")

# Predict bounding boxes on validation data
bbox_pred = model.predict(X_val)

# Compute accuracy (using IoU or other metric as needed)
iou_threshold = 0.5

def compute_iou(box1, box2):
    y1_max, x1_max, y1_min, x1_min = box1
    y2_max, x2_max, y2_min, x2_min = box2

    inter_xmin = max(x1_min, x2_min)
    inter_ymin = max(y1_min, y2_min)
    inter_xmax = min(x1_max, x2_max)
    inter_ymax = min(y1_max, y2_max)

    inter_area = max(0, inter_xmax - inter_xmin) * max(0, inter_ymax - inter_ymin)

    box1_area = (x1_max - x1_min) * (y1_max - y1_min)
    box2_area = (x2_max - x2_min) * (y2_max - y2_min)

    union_area = box1_area + box2_area - inter_area

    return inter_area / union_area if union_area != 0 else 0

# Calculate IoU for each prediction
ious = np.array([compute_iou(pred, actual) for pred, actual in zip(bbox_pred, bbox_val)])
accuracy = np.mean(ious > iou_threshold)
print(f"Accuracy based on IoU threshold of {iou_threshold}: {accuracy}")


Length of train_images1: 890
Length of train_bboxes: 890
Length of train_labels: 890


Epoch 1/10
