In [2]:
# Importing necessary libraries
# For data visualization
import matplotlib.pyplot as plt
# For numerical operations
import numpy as np
# For computer vision and image processing
import cv2
# For building and training machine learning models
import tensorflow as tf
# For evaluating model performance using F1 score
from sklearn.metrics import f1_score
# For optimization algorithms
from tensorflow.keras import optimizers
# Sequential model type for building neural networks
from tensorflow.keras.models import Sequential
# For image data augmentation
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# Layers used to build CNNs
from tensorflow.keras.layers import Dense, Flatten, MaxPooling2D, Dropout, Conv2D

In [None]:
# Load a pre-trained Haar Cascade classifier for detecting Indian license plates
plate_cascade = cv2.CascadeClassifier('indian_license_plate.xml')

In [None]:
# Create copies of the input image for processing and for ROI extraction
def detect_plate(img, text=''):
  # Copy of the original image for drawing rectangles
    plate_img = img.copy()
    # Copy of the original image for extracting the Region of Interest (ROI)
    roi = img.copy()

    # Detect license plates in the image using the pre-trained cascade classifier
    plate_rect = plate_cascade.detectMultiScale(
        plate_img,                # Image to perform detection on
        scaleFactor=1.2,          # Scale factor for image resizing to detect plates at various scales
        minNeighbors=7            # Minimum number of neighbors each candidate rectangle should have to retain it
    )

    # Iterate through detected plates and process them
    for (x, y, w, h) in plate_rect:
        # Define the Region of Interest (ROI) as the detected plate area
        # ROI for future processing if needed
        roi_ = roi[y:y+h, x:x+w, :]
        # The actual plate image to return
        plate = roi[y:y+h, x:x+w, :]

        # Draw a rectangle around the detected plate in the plate_img copy
        cv2.rectangle(
            plate_img,              # Image on which to draw
            (x+2, y),               # Top-left corner of the rectangle (with a small offset)
            (x+w-3, y+h-5),         # Bottom-right corner of the rectangle (with a small offset)
            (51, 181, 155),         # Color of the rectangle in BGR format
            3                       # Thickness of the rectangle border
        )

    # If text is provided, overlay the text on the detected plate
    if text != '':
        plate_img = cv2.putText(
            plate_img,              # Image on which to overlay text
            text,                   # Text to overlay (e.g., detected license plate number)
            (x - w // 2, y - h // 2), # Position for the text near the detected plate
            cv2.FONT_HERSHEY_COMPLEX_SMALL, # Font style
            0.5,                    # Font scale (size)
            (51, 181, 155),         # Font color in BGR format
            1,                      # Thickness of the text
            cv2.LINE_AA             # Line type for smooth text
        )

    # Return the image with the drawn rectangle and the extracted plate image
    return plate_img, plate


In [None]:
# Convert the color of the image from BGR (OpenCV default) to RGB (Matplotlib default)
def display(img_, title=''):
  # Create a Matplotlib figure to display the image
    img = cv2.cvtColor(img_, cv2.COLOR_BGR2RGB)
    # Set the figure size to 10x6 inches
    fig = plt.figure(figsize=(10,6))
    # Add a subplot to the figure
    ax = plt.subplot(111)
    # Display the image in the subplot
    ax.imshow(img)                     # Show the image on the subplot
    plt.axis('off')                    # Turn off axis lines and ticks for a cleaner display
    plt.title(title)                   # Set the title for the display window
    plt.show()                         # Render the image on the screen

# Load an image from a file
img = cv2.imread('car.jpg')            # Read 'car.jpg' using OpenCV (loads in BGR format)
display(img, 'input image')            # Display the image with the title 'input image'


In [None]:
# Detect license plates in the given image and get the processed output and the extracted plate
output_img, plate = detect_plate(img)

In [None]:
# Display the processed image with the detected license plate highlighted
display(output_img, 'detected license plate in the input image')

In [None]:
# Display the extracted license plate region with a descriptive title
display(plate, 'extracted license plate from the image')

In [None]:
def find_contours(dimensions, img):
    # Find contours in the binary image
    cntrs, _ = cv2.findContours(img.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    # Unpack contour filtering dimensions
    lower_width = dimensions[0]   # Minimum width of contour to consider
    upper_width = dimensions[1]   # Maximum width of contour to consider
    lower_height = dimensions[2]  # Minimum height of contour to consider
    upper_height = dimensions[3]  # Maximum height of contour to consider

    # Sort contours by area in descending order and keep the top 15 largest
    cntrs = sorted(cntrs, key=cv2.contourArea, reverse=True)[:15]

    # Load an image to visualize contours (assumes 'contour.jpg' exists)
    ii = cv2.imread('contour.jpg')

    # Lists to store x-coordinates of contours, target contours, and character images
    x_cntr_list = []
    target_contours = []
    img_res = []

    # Loop through each contour to find target contours based on size
    for cntr in cntrs:
        # Get the bounding box coordinates and dimensions of the contour
        intX, intY, intWidth, intHeight = cv2.boundingRect(cntr)

        # Check if contour falls within specified width and height ranges
        if intWidth > lower_width and intWidth < upper_width and intHeight > lower_height and intHeight < upper_height:
            x_cntr_list.append(intX)  # Store x-coordinate for sorting

            # Initialize a blank image for the character
            char_copy = np.zeros((44, 24))

            # Extract and resize the character from the image
            char = img[intY:intY+intHeight, intX:intX+intWidth]  # Crop character from the image
            char = cv2.resize(char, (20, 40))                    # Resize to standard size

            # Draw rectangle around the detected contour in the loaded image for visualization
            cv2.rectangle(ii, (intX, intY), (intWidth + intX, intY + intHeight), (50, 21, 200), 2)
            plt.imshow(ii, cmap='gray')  # Display updated image with rectangle

            # Invert colors of the character (black becomes white and vice versa)
            char = cv2.subtract(255, char)

            # Place resized character in the center of `char_copy` (44x24 size)
            char_copy[2:42, 2:22] = char

            # Set padding around the character in `char_copy` to zero for uniformity
            char_copy[0:2, :] = 0         # Top padding
            char_copy[:, 0:2] = 0         # Left padding
            char_copy[42:44, :] = 0       # Bottom padding
            char_copy[:, 22:24] = 0       # Right padding

            # Append the processed character image to the result list
            img_res.append(char_copy)

    plt.show()  # Show the visualization with drawn contours

    # Sort characters by x-coordinate to ensure they are in left-to-right order
    indices = sorted(range(len(x_cntr_list)), key=lambda k: x_cntr_list[k])

    # Reorder `img_res` based on sorted x-coordinates
    img_res_copy = [img_res[idx] for idx in indices]
    img_res = np.array(img_res_copy)

    # Return the ordered character images
    return img_res

In [None]:
def segment_characters(image):
    # Resize the input image to a standard size suitable for character segmentation
    img_lp = cv2.resize(image, (333, 75))  # Resize to 333x75 pixels (common license plate dimensions)

    # Convert the resized image to grayscale
    img_gray_lp = cv2.cvtColor(img_lp, cv2.COLOR_BGR2GRAY)

    # Apply binary thresholding using Otsu's method for optimal thresholding
    _, img_binary_lp = cv2.threshold(img_gray_lp, 200, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Perform morphological operations to clean up the binary image
    img_binary_lp = cv2.erode(img_binary_lp, (3,3))   # Erode to reduce noise
    img_binary_lp = cv2.dilate(img_binary_lp, (3,3))  # Dilate to restore character structure

    # Define the dimensions of the binary image
    LP_WIDTH = img_binary_lp.shape[0]   # Width of the license plate image
    LP_HEIGHT = img_binary_lp.shape[1]  # Height of the license plate image

    # Set the outer borders of the image to white (background color)
    img_binary_lp[0:3, :] = 255            # Top border
    img_binary_lp[:, 0:3] = 255            # Left border
    img_binary_lp[72:75, :] = 255          # Bottom border
    img_binary_lp[:, 330:333] = 255        # Right border

    # Define the approximate dimensions for characters within the license plate
    dimensions = [
        LP_WIDTH / 6,                      # Minimum width of a character
        LP_WIDTH / 2,                      # Maximum width of a character
        LP_HEIGHT / 10,                    # Minimum height of a character
        2 * LP_HEIGHT / 3                  # Maximum height of a character
    ]

    # Display the processed binary image for visual verification
    plt.imshow(img_binary_lp, cmap='gray')
    plt.show()

    # Save the binary image to a file (used by `find_contours` to visualize contours)
    cv2.imwrite('contour.jpg', img_binary_lp)

    # Use the `find_contours` function to detect and segment characters based on dimensions
    char_list = find_contours(dimensions, img_binary_lp)

    # Return the list of segmented character images
    return char_list

In [None]:
# Segment characters from the 'plate' image by calling the segment_characters function
char = segment_characters(plate)

In [None]:
for i in range(10):
    plt.subplot(1, 10, i+1)  # Create a subplot in a grid of 1 row and 10 columns, positioning it at i+1
    plt.imshow(char[i], cmap='gray')  # Display the ith character in 'char' list using a grayscale colormap
    plt.axis('off')  # Hide the axis labels (since we're displaying images)

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Create an ImageDataGenerator instance for image augmentation during training
train_datagen = ImageDataGenerator(
    rescale=1./255,  # Normalize pixel values to the range [0, 1] (by dividing by 255)
    rotation_range=20,  # Randomly rotate images by up to 20 degrees
    width_shift_range=0.2,  # Randomly shift the image horizontally by 20% of the width
    height_shift_range=0.2,  # Randomly shift the image vertically by 20% of the height
    shear_range=0.2,  # Apply shear transformations (slanting of the image) by up to 20%
    zoom_range=0.2,  # Randomly zoom in or out on images by up to 20%
    horizontal_flip=True,  # Randomly flip images horizontally
    fill_mode='nearest'  # Fill in newly created pixels after transformations with the nearest pixel value
)

# Create the train data generator that loads images from the 'train' directory
train_generator = train_datagen.flow_from_directory(
    'train',  # Directory containing the training images organized in subfolders (one per class)
    target_size=(28, 28),  # Resize all images to 28x28 pixels
    batch_size=32,  # Load 32 images in each batch
    class_mode='sparse'  # Labels will be integer-encoded (i.e., the class index will be returned)
)

# Create the validation data generator that loads images from the 'val' directory
validation_generator = train_datagen.flow_from_directory(
    'val',  # Directory containing the validation images organized in subfolders (one per class)
    target_size=(28, 28),  # Resize all images to 28x28 pixels
    batch_size=32,  # Load 32 images in each batch
    class_mode='sparse'  # Labels will be integer-encoded
)

In [None]:
# Function to compute the F1 score
def f1score(y, y_pred):
    # Compute F1 score using the 'f1_score' function from sklearn
    return f1_score(y, tf.math.argmax(y_pred, axis=1), average='micro')

# Function to compute F1 score as a TensorFlow-compatible custom metric
def custom_f1score(y, y_pred):
    # Use 'tf.py_function' to wrap the f1score function and make it compatible with TensorFlow's execution
    return tf.py_function(f1score, (y, y_pred), tf.double)

In [None]:
import tensorflow.keras.backend as K

# Clear the current TensorFlow session to avoid clutter from old models
K.clear_session()

# Initialize a new Sequential model
model = Sequential()

# Add a 2D convolutional layer with 16 filters of size 22x22
model.add(Conv2D(16, (22, 22), input_shape=(28, 28, 3), activation='relu', padding='same'))
# - 16 filters (kernels) are applied to the input image (28x28x3).
# - The filter size is 22x22.
# - 'relu' is used as the activation function, which applies a rectified linear unit transformation (max(0, x)) to each pixel.
# - 'padding=same' ensures that the output feature map has the same width and height as the input by padding the borders.
# - `input_shape=(28, 28, 3)` specifies the shape of the input image (28x28 pixels with 3 color channels).

# Add another 2D convolutional layer with 32 filters of size 16x16
model.add(Conv2D(32, (16, 16), input_shape=(28, 28, 3), activation='relu', padding='same'))
# - This layer uses 32 filters of size 16x16.
# - 'relu' activation function is applied again.

# Add a third convolutional layer with 64 filters of size 8x8
model.add(Conv2D(64, (8, 8), input_shape=(28, 28, 3), activation='relu', padding='same'))
# - 64 filters of size 8x8 are applied here.

# Add a fourth convolutional layer with 64 filters of size 4x4
model.add(Conv2D(64, (4, 4), input_shape=(28, 28, 3), activation='relu', padding='same'))
# - 64 filters of size 4x4 are applied.

# Add a max pooling layer with a pool size of 4x4
model.add(MaxPooling2D(pool_size=(4, 4)))
# - Max pooling is applied to reduce the spatial dimensions (width and height).
# - A pool size of 4x4 means it takes the maximum value from each 4x4 region of the feature map.

# Flatten the 2D output into a 1D array (required for the fully connected layers)
model.add(Flatten())
# - The `Flatten` layer converts the 2D matrix of feature maps into a 1D vector, which can then be fed into the fully connected layers.

# Add a fully connected (dense) layer with 128 neurons and 'relu' activation function
model.add(Dense(128, activation='relu'))
# - This layer has 128 neurons (fully connected to the flattened input).
# - The 'relu' activation function is used again.

# Add the final output layer with 36 neurons and 'softmax' activation function
model.add(Dense(36, activation='softmax'))
# - The final layer has 36 neurons, one for each possible class (e.g., 36 license plate characters or other classification tasks).
# - The 'softmax' activation function is used, which outputs a probability distribution over the 36 classes. It ensures that all output values sum up to 1, which is necessary for classification tasks.

# Compile the model
model.compile(
    loss='sparse_categorical_crossentropy',  # Loss function for multi-class classification with integer labels
    optimizer=optimizers.Adam(lr=0.0001),  # Adam optimizer with a learning rate of 0.0001
    metrics=[custom_f1score]  # Custom F1 score function is used to evaluate the model during training and evaluation
)

In [None]:
#The model.summary() function in Keras provides a summary of the model architecture, including details about the layers,
#their output shapes, and the number of parameters (weights) in each layer.
model.summary()

In [None]:
# Define a custom callback class by subclassing `tf.keras.callbacks.Callback`
class stop_training_callback(tf.keras.callbacks.Callback):

    # This method is called at the end of every epoch during training
    def on_epoch_end(self, epoch, logs={}):

        # Check if the custom F1 score ('val_custom_f1score') is greater than 0.99
        if logs.get('val_custom_f1score') > 0.99:

            # If the condition is met, stop training by setting `stop_training` to True
            self.model.stop_training = True

In [None]:
# Define the batch size used for training
batch_size = 32

# Instantiate the stop_training_callback, which will be used to monitor the validation F1 score
callbacks = [stop_training_callback()]

# Train the model using the .fit() method
model.fit(
    train_generator,  # The training data generator, which yields batches of data
    steps_per_epoch=train_generator.samples // batch_size,  # Number of steps per epoch, calculated based on the total number of training samples and batch size
    validation_data=validation_generator,  # The validation data generator, which provides validation data at the end of each epoch
    epochs=15,  # Number of training epochs
    verbose=1,  # Print progress messages during training (1 means progress bar)
    callbacks=callbacks  # List of callbacks to be applied during training (in this case, the stop_training_callback)
)

In [None]:
# Function to fix the image dimension to (28,28,3) by repeating the grayscale image for 3 channels
def fix_dimension(img):
    # Create a new empty image with 3 channels (28x28x3)
    new_img = np.zeros((28,28,3))

    # Loop over the 3 channels (RGB), and fill the channels with the same image
    for i in range(3):
        new_img[:,:,i] = img  # Copy the grayscale image into each of the three channels

    return new_img  # Return the newly created image with 3 channels

# Function to predict the plate number based on segmented characters
def show_results():
    dic = {}  # Dictionary to map the predicted output index to the actual character
    characters = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'  # List of all possible characters (digits and uppercase letters)

    # Create the dictionary to map index to characters
    for i, c in enumerate(characters):
        dic[i] = c

    output = []  # List to store the predicted characters

    # Loop over each character in the `char` list (which contains the segmented characters)
    for i, ch in enumerate(char):
        # Resize each character image to the size (28x28) using nearest-neighbor interpolation
        img_ = cv2.resize(ch, (28, 28), interpolation=cv2.INTER_AREA)

        # Fix the image dimension by converting it to 3 channels (RGB)
        img = fix_dimension(img_)

        # Reshape the image to match the input shape of the model (1, 28, 28, 3)
        img = img.reshape(1, 28, 28, 3)

        # Predict the class of the character using the trained model and find the most likely class
        y_ = np.argmax(model.predict(img), axis=-1)[0]

        # Get the corresponding character from the dictionary based on the predicted index
        character = dic[y_]

        # Append the predicted character to the output list
        output.append(character)

    # Join all the predicted characters into a single string representing the plate number
    plate_number = ''.join(output)

    return plate_number  # Return the predicted plate number

# Call the function and print the predicted plate number
print(show_results())

In [None]:
# Segmented characters and their predicted value.
plt.figure(figsize=(10, 6))  # Create a figure for plotting with a size of 10x6 inches

# Loop over each character in the `char` list
for i, ch in enumerate(char):
    # Resize the character image to 28x28 pixels using interpolation method INTER_AREA
    img = cv2.resize(ch, (28, 28), interpolation=cv2.INTER_AREA)

    # Create a subplot for each character. The layout is a 3x4 grid, and `i+1` indicates the position of the character.
    plt.subplot(3, 4, i + 1)

    # Display the resized character image in grayscale
    plt.imshow(img, cmap='gray')

    # Add the predicted character (from the `show_results()` function) as the title of the subplot
    plt.title(f'predicted: {show_results()[i]}')

    # Remove the axes for a cleaner presentation
    plt.axis('off')

# Show the plot
plt.show()

In [None]:
# Call the `show_results()` function to get the predicted license plate number
plate_number = show_results()