# Setup

In [6]:
import os
import csv
from PIL import Image
import pandas as pd
import numpy as np

# Set the target directory
target_dir = '.\\'

# Create a list to store the vectorized images
vectorized_images = []
labels = []

# Loop through each sub-directory
for subdir, dirs, files in os.walk(target_dir):
    for file in files:
        # Check if the file is an image
        if file.endswith('.jpg'):
            # Construct the full file path
            file_path = os.path.join(subdir, file)
            
            # Open the image
            image = Image.open(file_path)
            
            # Remove the white background
            image = image.convert("RGBA")
            data = np.array(image)
            red, green, blue, alpha = data.T
            white_areas = (red > 200) & (blue > 200) & (green > 200)
            data[..., -1][white_areas.T] = 0
            image = Image.fromarray(data)
            
            # Crop the image to fit the subject
            bbox = image.getbbox()
            image = image.crop(bbox)
            
            # Resize the image to 128x128 pixels
            image = image.resize((128, 128))
            
            # Change the transparent pixels to white
            new_image = Image.new("RGBA", image.size, "WHITE") # Create a white rgba background
            new_image.paste(image, (0, 0), image)              # Paste the image on the background. Go to the links given below for details.
            image = new_image.convert('RGB')
            
            # Vectorize the image
            image_vector = np.array(image).flatten()
            
            # Add the vectorized image to the list
            vectorized_images.append(image_vector)
            labels.append(subdir.split("\\")[-1])

            # Save the resized image as PNG
            new_file_path = os.path.join(subdir, f'{file.split(".")[0]}.png')
            image.save(new_file_path)
            
            # Close the image
            image.close()
            
            print(f'Resized {file} to 128x128 and saved as {new_file_path}')

# Convert the image data and labels to DataFrame
df = pd.DataFrame(vectorized_images)
df['label'] = labels

# Save DataFrame to CSV
df.to_csv('image_data.csv', index=False)

116
