# Fold Class Distribution Analysis

This notebook analyzes class distributions across cross-validation folds stored in a metadata CSV.
The CSV should include a `fold` column and task labels such as `pathology`, `region`, and `depth`.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Path to the metadata CSV with a 'fold' column
csv_path = 'path/to/metadata.csv'
df = pd.read_csv(csv_path)
df.head()

In [None]:
folds = sorted(df['fold'].unique())
print(f'Found {len(folds)} folds: {folds}')

In [None]:
label_cols = ['pathology', 'region', 'depth']

def plot_fold_distribution(df, label):
    ctab = pd.crosstab(df['fold'], df[label])
    display(ctab)
    ctab.plot(kind='bar', stacked=True, figsize=(8,4))
    plt.title(f'{label} distribution per fold')
    plt.ylabel('Count')
    plt.show()
    
    pct = ctab.div(ctab.sum(axis=1), axis=0) * 100
    display(pct.round(2))

for label in label_cols:
    if label in df.columns:
        plot_fold_distribution(df, label)
    else:
        print(f'Column {label} not found in CSV')