In [1]:
#%%
"""
Model: Multiclass Logistic Regression (Classficatio Model)  
@: paing_hein_soe
LinkedIn: https://www.linkedin.com/in/paing-hein-soe/

Step (2) - Read Image Pixel Data and Save as CSV
"""
import os
import pandas as pd
from PIL import Image


image_categories = ["Wind Plant", "Solar Farm", "Thermal Plant"]
folder_path_parent="D:\ML_Archived\GoogleImage_LogReg"
csv_file_name = "image_data_statistics.csv" # To create this CSV file 

In [2]:
# Initialize an empty list to store dataframes
dfs = []

folder_name_items = image_categories
folder_paths = [folder_path_parent + "\\" + item for item in folder_name_items]


# Iterate over each folder
for folder_path in folder_paths:
    # Get the folder name
    folder_name = os.path.basename(folder_path)
    
    # Initialize an empty list to store data for the current folder
    data = []
    
    # Iterate over each file in the folder
    for filename in os.listdir(folder_path):
        # Get the file path
        file_path = os.path.join(folder_path, filename)
        
        # Open the image and get its width and height
        with Image.open(file_path) as img:
            width, height = img.size
        
        # Append the file name, folder name, width, and height to the list
        data.append({'File Name': filename, 
                     'Folder Name': folder_name, 
                     'Width': width,
                     'Height': height})
    
    # Convert the list to a DataFrame
    df = pd.DataFrame(data)
    
    # Append the DataFrame to the list of DataFrames
    dfs.append(df)

# Concatenate all DataFrames into a single DataFrame
result_df = pd.concat(dfs, ignore_index=True)

# Display the DataFrame
print(result_df)

#%%

# Group the DataFrame by 'Folder Name' and aggregate the width and height columns
folder_stats = result_df.groupby('Folder Name').agg({
    'Width': ['min', 'max'],
    'Height': ['min', 'max']
})

# Rename the columns for clarity
folder_stats.columns = ['Min Width', 'Max Width', 'Min Height', 'Max Height']

# Display the statistics for each folder
print(folder_stats)

# Define the path where you want to save the CSV file

csv_file_path = os.path.join(folder_path_parent, csv_file_name)

# Save the folder statistics DataFrame to a CSV file
folder_stats.to_csv(csv_file_path)

print("Folder statistics saved successfully.")


# %%

      File Name    Folder Name  Width  Height
0    000001.jpg     Wind Plant   1600     870
1    000002.jpg     Wind Plant    860     580
2    000003.jpg     Wind Plant   2400    1160
3    000004.jpg     Wind Plant   3008    2000
4    000005.jpg     Wind Plant   1457    1088
..          ...            ...    ...     ...
238  000070.jpg  Thermal Plant   1024     576
239  000071.jpg  Thermal Plant   2049    3072
240  000072.jpg  Thermal Plant   1000     720
241  000073.jpg  Thermal Plant   1544    1029
242  000074.jpg  Thermal Plant    600     379

[243 rows x 4 columns]
               Min Width  Max Width  Min Height  Max Height
Folder Name                                                
Solar Farm           430       5750         250        3500
Thermal Plant        320       6116         275        4912
Wind Plant           280       6645         186        4430
Folder statistics saved successfully.
