In [1]:
# Importing necessary Libs

from google.colab import drive
import os
import cv2
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay

In [2]:
# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Path to GDrive folder
data_path = '/content/drive/MyDrive/ProjectOct/DATA SCIENTIST_ASSIGNMENT'

# CSV files and image folders
det_csv = f"{data_path}/Licplatesdetection_train.csv"
rec_csv = f"{data_path}/Licplatesrecognition_train.csv"
img_recog = f"{data_path}/Licplatesrecognition_train"

In [4]:
# Load CSV files
det_data = pd.read_csv(det_csv)
rec_data = pd.read_csv(rec_csv)

In [5]:
# Character set including Urdu alphabets and numbers (0-9)
# char_to_num maps each character to a numerical index; num_to_char reverses this mapping
chars = "0123456789ا ب پ ت ث ج ح خ د ذ ر ز س ش ص ض ط ظ ع غ ف ق ک گ ل م ن و ہ ی"
char_to_num = {char: i for i, char in enumerate(chars)}
num_to_char = {i: char for i, char in enumerate(chars)}

In [6]:
# Preprocess image data by loading and resizing them to 128x64
# Labels (license plate text) are converted to sequences of integers using char_to_num
def prep_data(rec_data, img_dir, max_len=8):
    imgs, labels, missing = [], [], []

    for _, row in rec_data.iterrows():
        img_id = row['img_id']
        img_path = os.path.join(img_dir, img_id)

        if not os.path.exists(img_path):
            missing.append(img_id)
            continue

        # Load and preprocess image
        img = cv2.imread(img_path)
        img = cv2.resize(img, (128, 64)).astype('float32') / 255.0  # Resize and rescale
        imgs.append(img)

        # Encode label (numbers + Urdu text)
        label = [char_to_num[char] for char in row['text'] if char in char_to_num]
        label = label + [0] * (max_len - len(label))  # Padding to max_len
        labels.append(label)

    if missing:
        print(f"Missing files: {missing}")

    return np.array(imgs), np.array(labels)

In [7]:
# Function to one-hot encode the labels (convert integer sequences to binary matrices)
def encode_labels(labels, chars):
    max_len = labels.shape[1]  # Max length of the sequence
    enc_labels = np.zeros((len(labels), max_len, len(chars)))  # Create a zero matrix for encoding

    # Populate the matrix with one-hot encoded values
    for i, label in enumerate(labels):
        for j, char_idx in enumerate(label):
            enc_labels[i, j, char_idx] = 1

    return enc_labels

In [8]:
# Preprocess the recognition dataset: load images and convert text labels to numeric sequences
X, y = prep_data(rec_data, img_recog)
y = encode_labels(y, chars)

In [9]:
# Build an improved CRNN model with Batch Normalization, GRU layers, and Dropout
def build_crnn(input_shape, num_classes, max_len=8):
    model = models.Sequential()

    # Convolutional layers for feature extraction
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.BatchNormalization())  # Normalizes the activations
    model.add(layers.MaxPooling2D((2, 2)))  # Reduces spatial dimensions
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.3))  # Dropout for regularization

    # Reshape the output for the GRU layers to process sequences
    model.add(layers.Reshape((max_len, -1)))  # Reshape into (batch_size, max_len, features)

    # GRU layers to model the sequence (license plate text)
    model.add(layers.GRU(256, return_sequences=True))
    model.add(layers.GRU(256, return_sequences=True))
    model.add(layers.Dropout(0.3))  # Dropout to prevent overfitting

    # TimeDistributed Dense layer to predict each character in the sequence
    model.add(layers.TimeDistributed(layers.Dense(num_classes, activation='softmax')))

    return model

In [10]:
# Model parameters
input_shape = (128, 64, 3)
num_classes = len(chars)

In [11]:
# Define a learning rate schedule (starts at 1e-4 and decays gradually)
lr_schedule = ExponentialDecay(initial_learning_rate=1e-4, decay_steps=10000, decay_rate=0.9)
optimizer = Adam(learning_rate=lr_schedule)

In [12]:
# Build and compile the CRNN model
crnn = build_crnn(input_shape, num_classes)
crnn.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
crnn.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
# Split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
# Data augmentation: apply small random transformations (rotation, zoom) during training to improve generalization
train_gen = ImageDataGenerator(rotation_range=5, zoom_range=0.2).flow(X_train, y_train, batch_size=16)
val_gen = (X_val, y_val)  # Validation data is not augmented

In [15]:
# Function for label smoothing
def smooth_labels(labels, smoothing=0.1):
    num_classes = labels.shape[-1]
    smooth_labels = labels * (1 - smoothing) + (smoothing / num_classes)
    return smooth_labels

# Applying label smoothing
y_train_smooth = smooth_labels(y_train)
y_val_smooth = smooth_labels(y_val)

In [16]:
# Early stopping callback to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [17]:
# Training the CRNN model
history = crnn.fit(train_gen, validation_data=val_gen, epochs=50, batch_size=16, callbacks=[early_stopping])

Epoch 1/50


  self._warn_if_super_not_called()


[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 479ms/step - accuracy: 0.1662 - loss: 3.6261 - val_accuracy: 0.0000e+00 - val_loss: 5.7746
Epoch 2/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 458ms/step - accuracy: 0.3170 - loss: 2.3267 - val_accuracy: 0.0000e+00 - val_loss: 6.6259
Epoch 3/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 413ms/step - accuracy: 0.3367 - loss: 2.1123 - val_accuracy: 0.0125 - val_loss: 6.2376
Epoch 4/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 419ms/step - accuracy: 0.3270 - loss: 2.0548 - val_accuracy: 0.1021 - val_loss: 5.0808
Epoch 5/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 428ms/step - accuracy: 0.3345 - loss: 2.0122 - val_accuracy: 0.2896 - val_loss: 3.1664
Epoch 6/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 411ms/step - accuracy: 0.3390 - loss: 1.9639 - val_accuracy: 0.3375 - val_loss: 2.2073
Epoch 7/50
[1m45/45[0m 

In [18]:
# Load and preprocess test images from the specified directory
def load_test_imgs(test_dir):
    imgs, img_ids = [], []

    # Loop through all images in the test directory
    for img_file in sorted(os.listdir(test_dir)):
        if img_file.endswith('.jpg'):
            img_path = os.path.join(test_dir, img_file)
            img = cv2.imread(img_path)
            img = cv2.resize(img, (128, 64)).astype('float32') / 255.0  # Normalize
            img = np.expand_dims(img, axis=0)  # Add batch dimension
            imgs.append(img)
            img_ids.append(img_file)  # Store image ID (filename)

    return np.vstack(imgs), img_ids  # Return the preprocessed images and their filenames

In [19]:
# Function to decode the predicted sequence of integers back to text (Urdu + numbers)
def decode_pred(pred, max_len=8):
    return ''.join([num_to_char[np.argmax(char)] for char in pred])

In [20]:
# Path to the test images directory
test_dir = f"{data_path}/test"

In [21]:
# Loading all test imgs
X_test, test_img_ids = load_test_imgs(test_dir)

In [22]:
# Func for Predicting the license plates for all test images
def predict_lp(crnn, X_test):
    preds = crnn.predict(X_test)  # Predict on all test images
    decoded_preds = [decode_pred(pred) for pred in preds]  # Decode the predictions into text
    return decoded_preds

In [23]:
# Running predictions on the test set
test_preds = predict_lp(crnn, X_test)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 214ms/step


In [24]:
# Func to Saving the predicted license plates into a CSV file
def save_preds_to_csv(img_ids, preds, output_csv):
    import csv
    with open(output_csv, mode='w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Image ID', 'Predicted License Plate'])
        for img_id, pred in zip(img_ids, preds):
            writer.writerow([img_id, pred])

In [25]:
# Save predictions to a CSV file in Google Drive
output_csv = f"{data_path}/test_preds.csv"
save_preds_to_csv(test_img_ids, test_preds, output_csv)
print(f"Predictions saved to {output_csv}")

Predictions saved to /content/drive/MyDrive/ProjectOct/DATA SCIENTIST_ASSIGNMENT/test_preds.csv
