# 🧪 Explore Pathology Dataset

Quick sanity checks before training:
- a bar chart of class counts
- image size/mode printouts
- random image previews
- a grid of samples

In [None]:
import os, random
from pathlib import Path
import matplotlib.pyplot as plt
from PIL import Image
import seaborn as sns

# config
DATA_ROOT = Path("../data/classification")
CLASSES = [d.name for d in DATA_ROOT.iterdir() if d.is_dir()]
print("Classes:", CLASSES)

In [None]:
# count images per class
counts = {cls: len(list((DATA_ROOT/cls).glob('*'))) for cls in CLASSES}
print(counts)

plt.figure(figsize=(10,5))
sns.barplot(x=list(counts.keys()), y=list(counts.values()))
plt.xticks(rotation=45, ha='right')
plt.title("Images per class")
plt.show()

In [None]:
# inspect one image from each class
for cls in CLASSES:
    sample = random.choice(list((DATA_ROOT/cls).glob('*')))
    img = Image.open(sample)
    print(cls, "->", img.size, "mode:", img.mode)
    display(img)
    break  # remove break to show all

In [None]:
# show grid of random samples
fig, axes = plt.subplots(2, 5, figsize=(15,6))
for ax, cls in zip(axes.flat, CLASSES[:10]):
    sample = random.choice(list((DATA_ROOT/cls).glob('*')))
    img = Image.open(sample)
    ax.imshow(img)
    ax.set_title(cls[:12])
    ax.axis('off')
plt.tight_layout()
plt.show()