# Import Libraries

In [1]:
import pandas as pd  # For handling datasets
import numpy as np  # For numerical operations
import cv2  # For image processing
import matplotlib.pyplot as plt  # For visualizing images
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Dense, Dropout, BatchNormalization, Flatten
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import os

# Load CSV Files

In [2]:
licplate_detection_data = pd.read_csv('Licplatesdetection_train.csv') # Detection dataset
licplate_recognition_data = pd.read_csv('Licplatesrecognition_train.csv') # Recognition dataset

FileNotFoundError: [Errno 2] No such file or directory: 'Licplatesdetection_train.csv'

# Display data

In [None]:
# Display the first few rows of each dataset

print("Detection Data (First 5 Rows):")
print(licplate_detection_data.head())

print("\nRecognition Data (First 5 Rows):")
print(licplate_recognition_data.head())

# Data Analysis

In [None]:
# Check for missing values in both datasets

print("Detection Data:")
print(licplate_detection_data.isnull().sum())

print("\nRecognition Data:")
print(licplate_recognition_data.isnull().sum())

In [None]:
# all info about both datasets

print(licplate_detection_data.info())
print()
print(licplate_recognition_data.info())

In [None]:
# Check for duplicate values in both datasets

print(licplate_detection_data.duplicated().sum())
print(licplate_recognition_data.duplicated().sum())

# Data Exploration

In [None]:
# Visualize a sample image from the detection dataset

sample_img_path = 'license_plates_detection_train/1.jpg'
sample_img = cv2.imread(sample_img_path)  # Read the image
plt.imshow(cv2.cvtColor(sample_img, cv2.COLOR_BGR2RGB))  # Display the image
plt.title("Sample Image from Detection Dataset")
plt.show()

In [None]:
# Visualize a sample image from the recognition dataset

sample_img_path = 'license_plates_recognition_train/0.jpg'
sample_img = cv2.imread(sample_img_path)  # Read the image
plt.imshow(cv2.cvtColor(sample_img, cv2.COLOR_BGR2RGB))  # Display the image
plt.title("Sample Image from Recognition Dataset")
plt.show()

# Data Preparation

In [None]:
# Prepare data for training the character recognition model
X_train = []  # To store training images
y_train = []  # To store training labels

In [None]:
# Convert text to numerical labels (alphanumeric encoding)
def encode_text(text):
    return [ord(char) - 48 if char.isdigit() else ord(char) - 55 for char in text]

In [None]:
for idx, row in licplate_recognition_data.iterrows():
    # Read and preprocess image
    img_path = f"license_plates_recognition_train/{row['img_id']}"
    img = cv2.imread(img_path)
    img = cv2.resize(img, (128, 64))  # Resize to (128x64) for better feature extraction
    X_train.append(img)

    # Encode text labels
    y_train.append(encode_text(row['text']))

In [None]:
# Convert image data to numpy array and normalize
X_train = np.array(X_train) / 255.0

In [None]:
# Pad label sequences to the same length
max_label_length = max(len(seq) for seq in y_train)
y_train = pad_sequences(y_train, maxlen=max_label_length, padding='post')

In [None]:
# One-hot encode labels for all characters
y_train = np.array([to_categorical(seq, num_classes=36) for seq in y_train])

In [None]:
# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Model Building

In [None]:
# Define the CRNN model input
inputs = Input(shape=(64, 128, 3))

# Convolutional layers for feature extraction
x = Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = BatchNormalization()(x)

x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = BatchNormalization()(x)

x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = BatchNormalization()(x)

x = Flatten()(x)

# Fully connected layer for classification
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(max_label_length * 36, activation='softmax')(x)

# Reshape outputs to match the sequence length and character categories
outputs = Reshape((max_label_length, 36))(outputs)

# Define the model
model = Model(inputs=inputs, outputs=outputs)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Display model summary
model.summary()

In [None]:
# Train the model
history = model.fit(X_train, y_train,
                    epochs=20, batch_size=32, validation_data=(X_val, y_val))

# Accuracy of Character Recognition

In [None]:
# Evaluate the model
accuracy = history.history['val_accuracy'][-1]  # Validation accuracy
print(f"Model Validation Accuracy: {accuracy * 100:.2f}%")

# Save Final Output

In [None]:
# Generate predictions for test data
output_data = []
test_img_paths = ['test/' + img for img in sorted(os.listdir('test'))]

In [None]:
for img_path in test_img_paths:
    img = cv2.imread(img_path)
    img = cv2.resize(img, (128, 64))
    img = img / 255.0
    img = np.expand_dims(img, axis=0)

    prediction = model.predict(img)
    predicted_text = ''.join(
        [chr(np.argmax(char_vec) + 48 if np.argmax(char_vec) < 10 else np.argmax(char_vec) + 55)
         for char_vec in prediction.reshape(max_label_length, 36)]
    )
    output_data.append({'id': img_path.split('/')[-1], 'text': predicted_text})

In [None]:
# Save output to CSV
output_df = pd.DataFrame(output_data)
output_df.to_csv('output.csv', index=False)
print("Output saved to output.csv")

# Display the Final Output

In [None]:
print("Sample Output:")
print(output_df)