In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image

# --- 1. Configuration ---
DATA_DIR = '../data/asl_dataset'
# Get class names from the folder names, excluding any hidden files
classes = sorted([d for d in os.listdir(DATA_DIR) if not d.startswith('.')])
num_classes = len(classes)

# --- 2. Analyze Class Balance ---
print("--- Analyzing Class Balance ---")
class_counts = {}
for sign_class in classes:
    class_path = os.path.join(DATA_DIR, sign_class)
    class_counts[sign_class] = len(os.listdir(class_path))

class_counts_df = pd.DataFrame(list(class_counts.items()), columns=['Class', 'Count'])

plt.figure(figsize=(18, 8))
sns.barplot(x='Class', y='Count', data=class_counts_df)
plt.title('Number of Images per Class in ASL Dataset', fontsize=16)
plt.xlabel('Class (Sign)', fontsize=12)
plt.ylabel('Number of Images', fontsize=12)
plt.xticks(rotation=45)
plt.show()
print("Observation: The dataset appears to be well-balanced across all classes.")


# --- 3. Display Sample Images ---
print("\n--- Displaying Sample Images ---")
plt.figure(figsize=(12, 12))
for i, sign_class in enumerate(classes[:25]): # Display first 25 classes
    class_path = os.path.join(DATA_DIR, sign_class)
    sample_image_name = os.listdir(class_path)[0]
    img = Image.open(os.path.join(class_path, sample_image_name))
    
    plt.subplot(5, 5, i + 1)
    plt.imshow(img)
    plt.title(f"Class: {sign_class}")
    plt.axis('off')

plt.tight_layout()
plt.show()

: 