In [3]:
import os
from PIL import Image
import pandas as pd

def process_images(directory):
    image_files = os.listdir(directory)
    data = []

    for index, file_name in enumerate(image_files):
        image_path = os.path.join(directory, file_name)
        average_rgb_value = average_rgb(image_path)
        data.append([index, average_rgb_value[0], average_rgb_value[1], average_rgb_value[2]])

    df = pd.DataFrame(data, columns=['Index', 'Average R', 'Average G', 'Average B'])
    return df

def average_rgb(image_path):
    image = Image.open(image_path)
    rgb_values = image.convert("RGB").getdata()

    total_pixels = len(rgb_values)
    total_r, total_g, total_b = 0, 0, 0
    for r, g, b in rgb_values:
        total_r += r
        total_g += g
        total_b += b

    average_r = total_r / total_pixels
    average_g = total_g / total_pixels
    average_b = total_b / total_pixels

    return (average_r, average_g, average_b)

# Example usage
directory = "./dataset/banana_ripeness/ripe"
df_ripe = process_images(directory)
df_ripe['ripeness_factor'] = 1



directory = "./dataset/banana_ripeness/green"
df_unripe = process_images(directory)
df_unripe['ripeness_factor'] = 0


df= pd.concat([df_ripe, df_unripe], ignore_index=True)




# Calculate the minimum and maximum values for each column
min_r = df['Average R'].min()
max_r = df['Average R'].max()

min_g = df['Average G'].min()
max_g = df['Average G'].max()

min_b = df['Average B'].min()
max_b = df['Average B'].max()

# Perform min-max normalization for 'Average R', 'Average G', and 'Average B'
df['Average R'] = (df['Average R'] - min_r) / (max_r - min_r)
df['Average G'] = (df['Average G'] - min_g) / (max_g - min_g)
df['Average B'] = (df['Average B'] - min_b) / (max_b - min_b)




df.to_csv("banana_rgb.csv", index=False)
print(df)


     Index  Average R  Average G  Average B  ripeness_factor
0        0   0.871980   0.843975   0.636597                1
1        1   0.645002   0.422874   0.027435                1
2        2   0.739557   0.645042   0.549605                1
3        3   0.739557   0.645042   0.549605                1
4        4   0.739557   0.645042   0.549605                1
..     ...        ...        ...        ...              ...
209    115   0.220237   0.341889   0.316260                0
210    116   0.806934   0.847212   0.865363                0
211    117   0.428349   0.515757   0.443910                0
212    118   0.278570   0.506838   0.244185                0
213    119   0.754431   0.847514   0.786781                0

[214 rows x 5 columns]


In [8]:
df.columns

Index(['Index', 'Average R', 'Average G', 'Average B', 'ripeness_factor'], dtype='object')

In [11]:
df_apple_orange = pd.read_csv('./dataset/apples_and_oranges.csv')

In [14]:
df_apple_orange.columns

Index(['Weight', 'Size', 'Class'], dtype='object')