In this notebook we execute the following steps:
- Initialization of a dictionary 'data' where we will store the attributes retrieved from the pictures
- Creation of a function 'image_data' to retrieve the attributes of a single picture 
- Creation of a function 'populate_dataset' to browse through the filesystem where we stored our raw images

Afterwards we:
- define our dataset directory where the images are stored
- execute the function to populate the dataset
- create and save a dataframe from the data 
- print a success message to show that the job was done

Note: On my laptop the execution time is around 10 minutes

In [1]:
import os
from typing import Tuple

from PIL import Image
import pandas as pd

# Create a DataFrame with the following columns in the dataset
# Columns name: Dataset, Plant, Healthy, Illness, Image_name, Image_width, Image_height, Image_format, Image_mode
data = {
    'Plant': [],
    'Healthy': [],
    'Illness': [],
    'Image_name': [],
    'Image_folder' : [],
    'Image_width': [],
    'Image_height': [],
    'Image_format': [],
    'Image_mode': []
}

def image_data(filepath: str, filename: str) -> Tuple[str, int, int, str, str]:
    """_summary_

    Args:
        filepath (str): _description_
        filename (str): _description_

    Returns:
        Tuple[str, int, int, str, str]: _description_
    """
    # open image
    image = Image.open(filepath)
    # Process the image here
    width, height = image.size  # size of image
    format = image.format  # JPEG, PNG, GIF
    mode = image.mode  # RGB, RGBA or others
    image.close()

    return filename, width, height, format, mode

def populate_dataset(dataset: dict, directory: str):
    # Loop through all the files and folders in the directory

    i = 0

    for folder_name in os.listdir(directory):

        if ("___" in folder_name):
            plant, healthy = folder_name.split("___")
        else:
            plant = folder_name
            healthy = "healthy"

        if os.path.isdir(os.path.join(directory, folder_name)):
            for file_name in os.listdir(os.path.join(directory, folder_name)):
                file_path = os.path.join(directory, folder_name, file_name)

                i += 1
                print("+++", file_path, f"{i}")

                name, width, height, format, mode = image_data(file_path, file_name)

                data['Plant'].append(plant)
                if healthy == "healthy":
                    data['Healthy'].append(1)
                else:
                    data['Healthy'].append(0)
                data['Illness'].append(healthy)
                data['Image_name'].append(name)
                data['Image_folder'].append(folder_name)
                data['Image_width'].append(width)
                data['Image_height'].append(height)
                data['Image_format'].append(format)
                data['Image_mode'].append(mode)

# Assuming 'data/raw' is the directory containing all the categories
dataset_dir = "../data/raw"

populate_dataset(data, dataset_dir)

# Create a sample DataFrame
df = pd.DataFrame(data)

# Export to CSV
df.to_csv('../data/1.0-bbri-dataframe.csv')

# Print success message
print("Creation of dataset finished succesfully")

+++ ../data/raw\Apple___Apple_scab\image (101).JPG 1
+++ ../data/raw\Apple___Apple_scab\image (110).JPG 2
+++ ../data/raw\Apple___Apple_scab\image (106).JPG 3
+++ ../data/raw\Apple___Apple_scab\image (103).JPG 4
+++ ../data/raw\Apple___Apple_scab\image (105).JPG 5
+++ ../data/raw\Apple___Apple_scab\image (104).JPG 6
+++ ../data/raw\Apple___Apple_scab\image (102).JPG 7
+++ ../data/raw\Apple___Apple_scab\image (107).JPG 8
+++ ../data/raw\Apple___Apple_scab\image (10).JPG 9
+++ ../data/raw\Apple___Apple_scab\image (1).JPG 10
+++ ../data/raw\Apple___Apple_scab\image (100).JPG 11
+++ ../data/raw\Apple___Apple_scab\image (112).JPG 12
+++ ../data/raw\Apple___Apple_scab\image (111).JPG 13
+++ ../data/raw\Apple___Apple_scab\image (113).JPG 14
+++ ../data/raw\Apple___Apple_scab\image (114).JPG 15
+++ ../data/raw\Apple___Apple_scab\image (108).JPG 16
+++ ../data/raw\Apple___Apple_scab\image (11).JPG 17
+++ ../data/raw\Apple___Apple_scab\image (109).JPG 18
+++ ../data/raw\Apple___Apple_scab\image 