In [None]:
# Install easyocr library
!pip install easyocr



In [None]:
#Load required libraries
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import warnings
import cv2
import urllib.request
import zipfile
import easyocr
from PIL import Image
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.preprocessing import LabelEncoder

warnings.filterwarnings("ignore")

In [None]:
# Download and extract the training and testing data
train_zip_url = 'https://github.com/rovianiameliaa/SatriaData/raw/main/Data_Train.zip'
test_zip_url = 'https://github.com/rovianiameliaa/SatriaData/raw/main/Data_Test.zip'
urllib.request.urlretrieve(train_zip_url, 'Data_Train.zip')
urllib.request.urlretrieve(test_zip_url, 'Data_Test.zip')

train_zip_ref = zipfile.ZipFile('Data_Train.zip', 'r')
train_zip_ref.extractall('data_train')

test_zip_ref = zipfile.ZipFile('Data_Test.zip', 'r')
test_zip_ref.extractall('data_test')
test_zip_ref.close()

In [None]:
# Load train and test data from Excel files
train_excel_url = 'https://github.com/rovianiameliaa/SatriaData/raw/main/DataTrain.xlsx'
urllib.request.urlretrieve(train_excel_url, 'DataTrain.xlsx')
train_df = pd.read_excel('DataTrain.xlsx')
print(train_df)

test_excel_url = 'https://github.com/rovianiameliaa/SatriaData/raw/main/DataTest.xlsx'
urllib.request.urlretrieve(test_excel_url, 'DataTest.xlsx')
test_df = pd.read_excel('DataTest.xlsx')
print(test_df)

     Unnamed: 0 Vehicleregistrationplate        NameofFile
0             0                    A7814    DataTrain1.png
1             1                  B1074QO    DataTrain2.png
2             2                  B1031QO    DataTrain3.png
3             3                  B187EDA    DataTrain4.png
4             4                  B1089VD    DataTrain5.png
..          ...                      ...               ...
795         795                 B1677EJC  DataTrain796.png
796         796                  B1743VO  DataTrain797.png
797         797                 AD1416YD  DataTrain798.png
798         798                 AB5419TN  DataTrain799.png
799         799                 AB6315SE  DataTrain800.png

[800 rows x 3 columns]
    Unnamed: 0     Name of File
0            0    DataTest1.png
1            1    DataTest2.png
2            2    DataTest3.png
3            3    DataTest4.png
4            4    DataTest5.png
..         ...              ...
95          95   DataTest96.png
96          

In [None]:
# Prepare the training data
train_images =[]
train_labels = []
train_image_dir = 'data_train'

for index, row in train_df.iterrows():
    image_index = index + 1
    image_path = os.path.join(train_image_dir, f'DataTrain{image_index}.png')
    image = cv2.imread(image_path)
    train_images.append(image)
    train_labels.append(row['Vehicleregistrationplate'])

# Prepare the testing data
test_images = []
test_image_dir = 'data_test'
test_filenames = []

for index, row in test_df.iterrows():
    image_index = index + 1
    image_path = os.path.join(test_image_dir, f'DataTest{image_index}.png')
    image = cv2.imread(image_path)
    test_images.append(image)
    test_filenames.append(f"DataTest{image_index}.png")

In [None]:
# Perform label encoding
label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_labels)

# Calculate the number of plate classes
num_classes = len(label_encoder.classes_)

# Define a function to preprocess and perform character segmentation on images
def perform_character_segmentation(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)

    # Convert image to 8-bit unsigned integer
    binary = np.uint8(binary)

    # Apply connected component analysis
    connectivity = 8
    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(binary, connectivity, cv2.CV_32S)

    # Extract individual character images
    segmented_characters = []
    for i in range(1, num_labels):
        x, y, w, h, area = stats[i]
        if area > 100:  # Filter out small components
            character = image[y:y+h, x:x+w]
            segmented_characters.append(character)

    return segmented_characters

In [None]:
# Build the Model Architecture
base_model = ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dense(256, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Preprocess the training images
train_characters = []
train_labels_extended = []
for i in range(len(train_images)):
    image = train_images[i]
    characters = perform_character_segmentation(image)
    label = train_labels[i]
    for character in characters:
        resized_character = cv2.resize(character, (224, 224))  # Resize character image to a fixed shape
        train_characters.append(resized_character)
        train_labels_extended.append(label)

train_characters = np.array(train_characters)
train_labels_extended = np.array(train_labels_extended)

# Convert the training data to TensorFlow tensors
train_characters = tf.convert_to_tensor(train_characters)
train_labels_extended = tf.convert_to_tensor(train_labels_extended)

# Create an image data generator with augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Generate augmented training data batches
train_generator = datagen.flow(train_characters, train_labels_extended, batch_size=128)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(train_generator, batch_size=64, epochs=50)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
# Preprocess the testing images
test_characters = []
for image in test_images:
    if image is not None:
        characters = perform_character_segmentation(image)
        for character in characters:
            resized_character = cv2.resize(character, (224, 224))
            test_characters.append(resized_character)
    else:
        print("Empty image found!")

test_characters = np.array(test_characters)

# Make predictions on the testing data
predictions = model.predict(test_characters)

# Translate the predictions into labels
predicted_labels = label_encoder.inverse_transform(np.argmax(predictions, axis=1))

# Filter out empty or unsuccessful predictions
valid_predictions = []
valid_filenames = []

for i in range(len(predicted_labels)):
    if i < len(test_filenames) and test_characters[i] is not None:  # Memastikan indeks valid dan gambar tidak kosong atau gagal diproses
        valid_predictions.append(predicted_labels[i])
        valid_filenames.append(test_filenames[i])

# Create a DataFrame with the valid filenames and predicted labels
result_df = pd.DataFrame({'Name of File': valid_filenames, 'Vehicleregistrationplate': valid_predictions})

# Save the predictions to a CSV file
result_df.to_csv('predictions.csv', index=False)