# Imports

In [2]:
from PIL import Image, ImageOps
import os
import glob
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# Making the images to the same size with padding for the training set

In [3]:
target_size = (224, 224)

def pad_image(image_path, target_size):
    img = Image.open(image_path)
    width, height = img.size

    pad_width = max(0, target_size[0] - width)
    pad_height = max(0, target_size[1] - height)
    
    left_padding = pad_width // 2
    top_padding = pad_height // 2
    #right_padding = pad_width - left_padding
    #bottom_padding = pad_height - top_padding

    padded_img = Image.new(img.mode, target_size, (225, 225, 225))
    padded_img.paste(img, (left_padding, top_padding))

    return padded_img


input_directory = 'Data/train_v2/train/'
output_directory = 'Data/train_v2/train_resize/'

os.makedirs(output_directory, exist_ok=True)

for image_file in glob.glob(os.path.join(input_directory, '*.jpg')):  # Adjust the file extension as needed
    padded_image = pad_image(image_file, target_size)
    filename = os.path.splitext(os.path.basename(image_file))[0]
    output_path = os.path.join(output_directory, f'{filename}_padded.jpg')
    padded_image.save(output_path)

print("Padding completed for all images in the directory.")


Padding completed for all images in the directory.


# one hot encode name label

In [8]:
decoded_labels = []

label_encoder = LabelEncoder()
onehot_encoder = OneHotEncoder(sparse_output=True)

image_names = []
label_names = []

chunk_size = 1000

data_reader = pd.read_csv('Data/written_name_train_v2.csv', chunksize=chunk_size)

for chunk in data_reader:
    image_names.extend(chunk['FILENAME'])
    label_names.extend(chunk['IDENTITY'])

    label_values = label_encoder.fit_transform(chunk['IDENTITY'])
    onehot_labels = onehot_encoder.fit_transform(label_values.reshape(-1, 1))
    dense_labels = onehot_labels.toarray()

    for row in dense_labels:
        decoded_label = label_encoder.inverse_transform([row.argmax()])
        decoded_labels.append(decoded_label[0])

for i, image_name in enumerate(image_names):
    print(f"Image Name: {image_name}, Original Label: {label_names[i]}, Decoded Label: {decoded_labels[i]}")



IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)

