# 00: Visual Exploration

**Goal**: Build intuition about the manuscript through visual inspection.

This notebook provides:
- Random folio viewer
- Section-by-section sampling
- Basic visual statistics
- Foundation for later bounding box overlays


In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt
import random

# Set paths
DATA_DIR = Path('../data')
IMAGES_DIR = DATA_DIR / 'raw/yale/images'
METADATA_DIR = DATA_DIR / 'raw/transcriptions/metadata'


## Load Metadata


In [None]:
# Load folio metadata
try:
    folios_df = pd.read_csv(METADATA_DIR / 'folios.csv', comment='#')
    print(f"Loaded metadata for {len(folios_df)} folios")
    print(f"Sections: {folios_df['section'].unique()}")
except FileNotFoundError:
    print("Metadata file not found. Create data/raw/transcriptions/metadata/folios.csv")
    folios_df = None


## Random Folio Viewer


In [None]:
def view_random_folio(n=1):
    """Display n random folios."""
    image_files = sorted(list(IMAGES_DIR.glob('f*.png')) + list(IMAGES_DIR.glob('f*.jpg')))
    
    if not image_files:
        print("No folio images found. Run pdf_to_images.py first.")
        return
    
    selected = random.sample(image_files, min(n, len(image_files)))
    
    fig, axes = plt.subplots(1, len(selected), figsize=(15*len(selected), 20))
    if len(selected) == 1:
        axes = [axes]
    
    for ax, img_path in zip(axes, selected):
        img = Image.open(img_path)
        ax.imshow(img)
        ax.set_title(f"{img_path.stem}", fontsize=16)
        ax.axis('off')
    
    plt.tight_layout()
    plt.show()

# View a random folio
view_random_folio(1)
