# Imports

In [1]:
from PIL import Image
import os
import glob
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical


# Making the images to the same size with padding for the training set

In [2]:
target_size = (224, 224)

def pad_image(image_path, target_size):
    img = Image.open(image_path)
    width, height = img.size

    pad_width = max(0, target_size[0] - width)
    pad_height = max(0, target_size[1] - height)
    
    left_padding = pad_width // 2
    top_padding = pad_height // 2
    #right_padding = pad_width - left_padding
    #bottom_padding = pad_height - top_padding

    padded_img = Image.new(img.mode, target_size, (225, 225, 225))
    padded_img.paste(img, (left_padding, top_padding))

    return padded_img


input_directory = 'Data/validation_v2/validation/'
output_directory = 'Data/validation_v2/validation_resize/'

os.makedirs(output_directory, exist_ok=True)

for image_file in glob.glob(os.path.join(input_directory, '*.jpg')):  # Adjust the file extension as needed
    padded_image = pad_image(image_file, target_size)
    filename = os.path.splitext(os.path.basename(image_file))[0]
    output_path = os.path.join(output_directory, f'{filename}.jpg')
    padded_image.save(output_path)

print("Padding completed for all images in the directory.")


Padding completed for all images in the directory.


In [7]:
train_csv_path = 'Data\\written_name_train_v2.csv'
validation_csv_path = 'Data\\written_name_validation_v2.csv'
test_csv_path = 'Data\\written_name_test_v2.csv'

train_image_dir = 'Data\\train_v2\\train_resize'
validation_image_dir = 'Data\\validation_v2\\validation_resize'
test_image_dir = 'Data\\test_v2\\test_resize'

train_data = pd.read_csv(train_csv_path)
validation_data = pd.read_csv(validation_csv_path)
test_data = pd.read_csv(test_csv_path)

train_data['IDENTITY'] = train_data['IDENTITY'].astype(str)
validation_data['IDENTITY'] = validation_data['IDENTITY'].astype(str)
test_data['IDENTITY'] = test_data['IDENTITY'].astype(str)

num_classes = len(train_data['IDENTITY'].unique())
batch_num = 32

train_datagen = ImageDataGenerator(
    rescale=1.0/255.0,
)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    directory=train_image_dir,
    x_col='FILENAME',
    y_col='IDENTITY',
    target_size=(224, 224),
    batch_size=batch_num,
    class_mode='categorical',
    shuffle=True
)

validation_datagen = ImageDataGenerator(rescale=1.0/255.0)

validation_generator = validation_datagen.flow_from_dataframe(
    dataframe=validation_data,
    directory=validation_image_dir,
    x_col='FILENAME',
    y_col='IDENTITY',
    target_size=(224, 224),
    batch_size=batch_num,
    class_mode='categorical',
    shuffle=False 
)

test_datagen = ImageDataGenerator(rescale=1.0/255.0)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_data,
    directory=test_image_dir,
    x_col='FILENAME',
    y_col='IDENTITY',
    target_size=(224, 224),
    batch_size=batch_num,
    class_mode='categorical',
    shuffle=False
)

model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=10,
    validation_data=validation_generator,
    validation_steps=len(validation_generator)
)

test_loss, test_accuracy = model.evaluate(test_generator, steps=len(test_generator))
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

Found 330961 validated image filenames belonging to 100540 classes.
Found 41370 validated image filenames belonging to 20228 classes.
Found 41370 validated image filenames belonging to 20280 classes.
Epoch 1/10
   74/10343 [..............................] - ETA: 1:53:01 - loss: 11.2241 - accuracy: 0.0025

KeyboardInterrupt: 