# Load Data

In [None]:
import scipy.io
import pandas as pd
import matplotlib.pyplot as plt
import os
from PIL import Image, ImageOps
import re
import numpy as np

In [None]:
rnd_seed = 1234
np.random.seed(rnd_seed)

In [None]:
# define path to data
images_path = '../Files/102flowers/jpg'
labels_path = '../Files/imagelabels.mat'

## Labels

In [None]:
labels = scipy.io.loadmat(labels_path)
labels_org = pd.DataFrame(labels['labels'])

In [None]:
labels_org.head()

## Images

In [None]:
dataset = []

for img_file in os.listdir(images_path):
    if img_file.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
        image_number = int(re.split(r'[_\.]', img_file)[1]) # image number from image name( name such as : image_number.format )
        image_class  = labels_org[image_number - 1].values[0] # minus 1 : mat file columns are zero index
        img_path = os.path.join(images_path, img_file)
        image_dict = {
            'image_path' : img_path,
            'category'   : image_class
        }

        dataset.append(image_dict)

In [None]:
dataset

## Statistics

In [None]:
labels_df = labels_org.transpose()

In [None]:
labels_df.describe().loc[['count','min','max']]

In [None]:
"""
Number of Classes : 102
Number of Images  : 8189
"""

## Data Distribution

In [None]:
labels_df['count'] = 1
class_count = labels_df.groupby(0).count()['count']

In [None]:
class_count.describe()

In [None]:
save_path = '../Results/Categories/Charts'

if not os.path.exists(save_path):
    os.makedirs(save_path)

In [None]:
plt.figure(figsize=(12,6))
plt.title('Data Distribution')
plt.xlabel('Category')
plt.ylabel('Count')
plt.bar([x for x in range(1,103)],class_count)
filename = os.path.join(save_path, 'Data Distribution.jpg')
plt.savefig(filename, bbox_inches='tight', dpi=300)
plt.show()
plt.close('all')

## Data Visualization

### Group data by Category

In [None]:
dataset_df = pd.DataFrame(dataset)
dataset_df.to_json('../Files/dataframe.json', orient='records', lines=True) # save dataset for next steps
grouped    = dataset_df.groupby('category')

### same categories data plot

In [None]:
"""
102 category => 102 rows and 4 columns
"""

save_path = '../Results/Categories/Visualization'

if not os.path.exists(save_path):
    os.mkdir(save_path)

for category, group in grouped:
    fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(12, 3))
    fig.suptitle(f'Category {category}', fontsize=12)
    for i in range(4):
        img = plt.imread(group['image_path'].tolist()[i])
        axes[i].imshow(img)
        axes[i].axis('off')
    plt.tight_layout()
    plt.show()
    filename = os.path.join(save_path, f'{category}.jpg')
    fig.savefig(filename, bbox_inches='tight', dpi=300)
plt.close('all')

### Different Categories Visualization

In [None]:
"""
102 category => 13 rows and 8 columns
"""

fig, axes = plt.subplots(nrows=13, ncols=8, figsize=(18, 18))
axes = axes.flatten()


for ax, (category, group) in zip(axes,grouped):
    img = plt.imread(group['image_path'].tolist()[0])
    ax.imshow(img)
    ax.axis('off')
    ax.set_title(f'Category {category}', fontsize=8)
    
for ax in axes[102:]:
    ax.axis('off')

plt.tight_layout()
plt.show()
filename = os.path.join(save_path, 'All_Categories.jpg')
fig.savefig(filename, bbox_inches='tight', dpi=300)
plt.close('all')

### Channels Distribution

In [None]:
channel_means = {}
for category, group in grouped:
    group_images = group['image_path'].tolist()
    category_images = []
    for img_path in group_images:
        img = Image.open(img_path)
        img = np.array(img)
        category_images.append(img)
    red_channel = np.concatenate([img[:,:,0].ravel() for img in category_images])
    green_channel = np.concatenate([img[:,:,1].ravel() for img in category_images])
    blue_channel = np.concatenate([img[:,:,2].ravel() for img in category_images])
    channel_means[category] = {
        'red': np.mean(red_channel),
        'green': np.mean(green_channel),
        'blue': np.mean(blue_channel)
    }

In [None]:
save_path = '../Results/Categories/Charts'

if not os.path.exists(save_path):
    os.mkdir(save_path)

labels = list(channel_means.keys())

red_means   = [channel_means[cls]['red'] for cls in labels]
green_means = [channel_means[cls]['green'] for cls in labels]
blue_means  = [channel_means[cls]['blue'] for cls in labels]

x = np.arange(len(labels))
width = 0.5

colors = {
    'red'  : '#FF9999',  
    'green': '#99FF99',
    'blue' : '#9999FF'  
}
fig, ax = plt.subplots(figsize=(16, 6))

ax.bar(x, red_means, width, color=colors['red'], label='Red Channel')
ax.bar(x, green_means, width, bottom=red_means, color=colors['green'], label='Green Channel')
ax.bar(x, blue_means, width, bottom=np.array(red_means) + np.array(green_means), color=colors['blue'], label='Blue Channel')

ax.set_xlabel('Categories')
ax.set_ylabel('Mean Pixel Value')
ax.set_title('Mean Pixel Value by Channel for Each Categoriy')
ax.set_xticks(x)
ax.set_xticklabels(labels, rotation=90)
ax.legend()
filename = os.path.join(save_path, 'Color Distribution.jpg')
plt.savefig(filename, dpi=300)
plt.show()

plt.close('all')

In [None]:
fig, ax = plt.subplots(figsize=(16, 6))

ax.bar(x, red_means, width, color=colors['red'], label='Red Channel')

ax.set_xlabel('Categories')
ax.set_ylabel('Mean Pixel Value')
ax.set_title('Mean Pixel Value by Red Channel for Each Categoriy')
ax.set_xticks(x)
ax.set_xticklabels(labels, rotation=90)
ax.legend()
filename = os.path.join(save_path, 'Red channel Distribution.jpg')
plt.savefig(filename, dpi=300)
plt.show()

plt.close('all')

In [None]:
fig, ax = plt.subplots(figsize=(16, 6))

ax.bar(x, green_means, width, color=colors['green'], label='Green Channel')

ax.set_xlabel('Categories')
ax.set_ylabel('Mean Pixel Value')
ax.set_title('Mean Pixel Value by Green Channel for Each Categoriy')
ax.set_xticks(x)
ax.set_xticklabels(labels, rotation=90)
ax.legend()
filename = os.path.join(save_path, 'Green Color Distribution.jpg')
plt.savefig(filename, dpi=300)
plt.show()

plt.close('all')

In [None]:
fig, ax = plt.subplots(figsize=(16, 6))

ax.bar(x, blue_means, width, color=colors['blue'], label='Blue Channel')

ax.set_xlabel('Categories')
ax.set_ylabel('Mean Pixel Value')
ax.set_title('Mean Pixel Value by Blue Channel for Each Categoriy')
ax.set_xticks(x)
ax.set_xticklabels(labels, rotation=90)
ax.legend()
filename = os.path.join(save_path, 'Blue Color Distribution.jpg')
plt.savefig(filename, dpi=300)
plt.show()

plt.close('all')

In [None]:
gray_scales = {}
for category, group in grouped:
    group_images = group['image_path'].tolist()
    category_images = []
    for img_path in group_images:
        img = Image.open(img_path)
        img = ImageOps.grayscale(img)
        img = np.array(img)
        category_images.append(img)
    channel = np.concatenate([img.ravel() for img in category_images])
    gray_scales[category] = {
        'gray_scale': np.mean(channel)
    }

In [None]:
save_path = '../Results/Categories/Charts'

if not os.path.exists(save_path):
    os.mkdir(save_path)

labels = list(gray_scales.keys())

means = [gray_scales[cls]['gray_scale'] for cls in labels]

x = np.arange(len(labels))
width = 0.5

fig, ax = plt.subplots(figsize=(16, 6))

ax.bar(x, means, width, color='gray', label='GrayScale')

ax.set_xlabel('Categories')
ax.set_ylabel('Mean Pixel Value')
ax.set_title('Mean Pixel Value by GrayScale for Each Categoriy')
ax.set_xticks(x)
ax.set_xticklabels(labels, rotation=90)
ax.legend()
filename = os.path.join(save_path, 'GrayScale Distribution.jpg')
plt.savefig(filename, dpi=300)
plt.show()

plt.close('all')