In [None]:
# Import libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import pandas as pd

In [None]:
# Set paths
dataset_path = '../data/processed/Car-Bike-Dataset'
classes = ['Car', 'Bike']

In [None]:
# Collect image file paths and labels
image_paths = []
labels = []

for cls in classes:
    cls_folder = os.path.join(dataset_path, cls)
    for file in os.listdir(cls_folder):
        if file.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_paths.append(os.path.join(cls_folder, file))
            labels.append(cls)

# Create a DataFrame for easier analysis
df = pd.DataFrame({
    'image_path': image_paths,
    'label': labels
})

In [None]:
# Class Distribution
plt.figure(figsize=(6,4))
sns.countplot(data=df, x='label', hue='label')
plt.title('Class Distribution')
plt.xlabel('Class')
plt.ylabel('Number of Images')
plt.show()

In [None]:
# Image Dimensions per class
widths = []
heights = []

for path in df['image_path']:
    with Image.open(path) as img:
        widths.append(img.width)
        heights.append(img.height)

df['width'] = widths
df['height'] = heights

# Plot boxplots per class
plt.figure(figsize=(14,5))

# Width boxplot
plt.subplot(1,2,1)
sns.boxplot(x='label', y='width', data=df, hue='label')
plt.title('Image Widths per Class')
plt.xlabel('Class')
plt.ylabel('Width (pixels)')
plt.grid(True)

# Height boxplot
plt.subplot(1,2,2)
sns.boxplot(x='label', y='height', data=df, hue='label')
plt.title('Image Heights per Class')
plt.xlabel('Class')
plt.ylabel('Height (pixels)')
plt.grid(True)

plt.tight_layout()
plt.show()

In [None]:
# Scatter plot of width vs height
plt.figure(figsize=(6,6))
sns.scatterplot(x='width', y='height', hue='label', data=df)
plt.title('Image Width vs Height by Class')
plt.xlabel('Width (pixels)')
plt.ylabel('Height (pixels)')
plt.legend(title='Class')
plt.show()

In [None]:
# Sample images per class
plt.figure(figsize=(15, 6))

for i, cls in enumerate(classes):
    # Get the first `num_samples` images for this class
    sample_paths = df[df['label'] == cls]['image_path'].iloc[:5]
    
    for j, path in enumerate(sample_paths):
        ax = plt.subplot(len(classes), 5, i*5 + j + 1)
        with Image.open(path) as img:
            plt.imshow(img)
            plt.axis('off')
            if j == 0:
                ax.set_title(cls, fontsize=14, pad=10)
                
plt.suptitle('First 5 Images of Each Class', fontsize=16)
plt.tight_layout()
plt.show()

In [None]:
# Average Image per Class

# Decide on a common size (can also use min width/height per class)
common_size = (224, 224)  # (width, height)

plt.figure(figsize=(8,4))

for i, cls in enumerate(classes):
    class_paths = df[df['label'] == cls]['image_path']
    imgs = []
    
    for path in class_paths:
        with Image.open(path) as img:
            img = img.convert('RGB')
            img = img.resize(common_size)  # Resize to common size
            imgs.append(np.array(img, dtype=np.float32))
    
    # Compute average image
    avg_img = np.mean(imgs, axis=0).astype(np.uint8)
    
    # Display
    plt.subplot(1, len(classes), i+1)
    plt.imshow(avg_img)
    plt.axis('off')
    plt.title(f'Average {cls}')

plt.suptitle('Average Image per Class', fontsize=16)
plt.tight_layout()
plt.show()

In [None]:
# Average brightness per class

# Compute average brightness per image
brightness_list = []

for _, row in df.iterrows():
    with Image.open(row['image_path']) as img:
        img_gray = img.convert('L')  # Convert to grayscale
        brightness = np.array(img_gray, dtype=np.float32).mean()
        brightness_list.append(brightness)

# Add to dataframe
df['brightness'] = brightness_list

# Plot brightness per class
plt.figure(figsize=(10,5))
sns.boxplot(x='label', y='brightness', data=df, hue='label')
plt.title('Image Brightness per Class')
plt.xlabel('Class')
plt.ylabel('Average Brightness')
plt.grid(True)
plt.show()

In [None]:
# Average intensity per color channel per image

avg_r, avg_g, avg_b, labels = [], [], [], []

for _, row in df.iterrows():
    with Image.open(row['image_path']) as img:
        img = img.convert('RGB')
        arr = np.array(img, dtype=np.float32)
        avg_r.append(arr[:,:,0].mean())
        avg_g.append(arr[:,:,1].mean())
        avg_b.append(arr[:,:,2].mean())
        labels.append(row['label'])

# Create DataFrame
import pandas as pd
channel_df = pd.DataFrame({
    'R': avg_r,
    'G': avg_g,
    'B': avg_b,
    'label': labels
})

# Plot histograms per channel per class
plt.figure(figsize=(15,4))

for i, channel in enumerate(['R','G','B']):
    plt.subplot(1,3,i+1)
    sns.histplot(data=channel_df, x=channel, hue='label', bins=30, kde=True, alpha=0.7)
    plt.title(f'{channel} Channel Intensity per Class')
    plt.xlabel('Average Pixel Intensity')
    plt.ylabel('Count')
    plt.grid(True)

plt.tight_layout()
plt.show()