# 04 - Cross-Validation Strategies

Proper cross-validation is critical for neuroimaging to avoid data leakage.

**Contents:**
1. Leave-One-Run-Out (fMRI)
2. Leave-One-Subject-Out (Group analysis)
3. Stratified Group K-Fold
4. Permutation Testing

In [None]:
import sys
sys.path.insert(0, '..')

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

from core.dataset import DecodingDataset
from models.classifiers import SVMDecoder
from validation.cross_validation import (
    LeaveOneRunOut,
    LeaveOneSubjectOut,
    StratifiedGroupKFold
)
from validation.permutation import PermutationTest

## Create Multi-Run Dataset

In [None]:
# Simulate fMRI data with runs
X, y = make_classification(
    n_samples=200,
    n_features=1000,
    n_informative=50,
    n_classes=2,
    random_state=42
)

# 5 runs with 40 trials each
runs = np.repeat(np.arange(1, 6), 40)

dataset = DecodingDataset(
    X=X,
    y=y,
    groups=runs,
    class_names=["class_A", "class_B"],
    modality="fmri"
)

print(f"Dataset: {dataset.n_samples} samples")
print(f"Runs: {np.unique(runs)}")

## 1. Leave-One-Run-Out (LORO)

Standard for single-subject fMRI decoding. Prevents temporal autocorrelation leakage.

In [None]:
# LORO cross-validation
loro_cv = LeaveOneRunOut()

print(f"Number of folds: {loro_cv.get_n_splits(groups=runs)}")

# Show splits
for i, (train_idx, test_idx) in enumerate(loro_cv.split(X, y, groups=runs)):
    train_runs = np.unique(runs[train_idx])
    test_run = np.unique(runs[test_idx])
    print(f"Fold {i+1}: Train runs {train_runs.tolist()}, Test run {test_run.tolist()}")

In [None]:
# Cross-validate with LORO
decoder = SVMDecoder(kernel="linear")
results_loro = decoder.cross_validate(dataset, cv=loro_cv)

print(f"\nLORO Results:")
print(f"  Mean accuracy: {results_loro.accuracy:.1%}")
print(f"  Std: {results_loro.cv_std:.1%}")
print(f"  CV scores: {[f'{s:.1%}' for s in results_loro.cv_scores]}")

## 2. Leave-One-Subject-Out (LOSO)

For multi-subject group analysis. Tests generalization across subjects.

In [None]:
# Create multi-subject dataset
n_subjects = 10
n_trials_per_subject = 50

X_multi, y_multi = make_classification(
    n_samples=n_subjects * n_trials_per_subject,
    n_features=500,
    n_informative=30,
    n_classes=2,
    random_state=42
)

subjects = np.repeat(np.arange(1, n_subjects + 1), n_trials_per_subject)

multi_dataset = DecodingDataset(
    X=X_multi,
    y=y_multi,
    groups=subjects,
    class_names=["class_A", "class_B"],
    modality="fmri"
)

print(f"Multi-subject dataset: {multi_dataset.n_samples} samples")
print(f"Subjects: {np.unique(subjects)}")

In [None]:
# LOSO cross-validation
loso_cv = LeaveOneSubjectOut()

results_loso = decoder.cross_validate(multi_dataset, cv=loso_cv)

print(f"LOSO Results:")
print(f"  Mean accuracy: {results_loso.accuracy:.1%}")
print(f"  Std across subjects: {results_loso.cv_std:.1%}")

In [None]:
# Plot per-subject accuracy
plt.figure(figsize=(10, 5))
subjects_unique = np.arange(1, n_subjects + 1)
plt.bar(subjects_unique, results_loso.cv_scores, color='steelblue', edgecolor='black')
plt.axhline(y=results_loso.accuracy, color='red', linestyle='--', label=f'Mean: {results_loso.accuracy:.1%}')
plt.axhline(y=0.5, color='gray', linestyle='--', label='Chance')
plt.xlabel('Subject')
plt.ylabel('Accuracy')
plt.title('Leave-One-Subject-Out Results')
plt.xticks(subjects_unique)
plt.legend()
plt.show()

## 3. Stratified Group K-Fold

K-fold that keeps groups (runs) together while maintaining class balance.

In [None]:
# Stratified group k-fold
sgkf = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=42)

# Show splits
for i, (train_idx, test_idx) in enumerate(sgkf.split(X, y, groups=runs)):
    train_runs = np.unique(runs[train_idx])
    test_runs = np.unique(runs[test_idx])
    train_class_dist = np.bincount(y[train_idx])
    test_class_dist = np.bincount(y[test_idx])
    print(f"Fold {i+1}:")
    print(f"  Train runs: {train_runs.tolist()}, classes: {train_class_dist}")
    print(f"  Test runs: {test_runs.tolist()}, classes: {test_class_dist}")

In [None]:
# Cross-validate
results_sgkf = decoder.cross_validate(dataset, cv=sgkf)

print(f"\nStratified Group K-Fold Results:")
print(f"  Mean accuracy: {results_sgkf.accuracy:.1%}")
print(f"  Std: {results_sgkf.cv_std:.1%}")

## 4. Permutation Testing

Test statistical significance by comparing to null distribution.

In [None]:
# Permutation test
perm_test = PermutationTest(
    n_permutations=100,  # Use 1000 for publication
    n_jobs=-1,
    verbose=1
)

results_perm = perm_test.test(decoder, dataset, cv=loro_cv)

print(f"\nPermutation Test Results:")
print(f"  Observed accuracy: {perm_test.observed_score_:.1%}")
print(f"  P-value: {results_perm.permutation_pvalue:.4f}")
print(f"  Significant (p<0.05): {results_perm.is_significant}")

In [None]:
# Plot null distribution
perm_test.plot()

## 5. Comparing CV Strategies

In [None]:
from sklearn.model_selection import StratifiedKFold

# Compare different CV strategies
cv_strategies = {
    'Stratified 5-Fold': StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
    'Leave-One-Run-Out': LeaveOneRunOut(),
    'Stratified Group K-Fold': StratifiedGroupKFold(n_splits=5)
}

cv_results = {}
for name, cv in cv_strategies.items():
    if 'Run' in name or 'Group' in name:
        results = decoder.cross_validate(dataset, cv=cv)
    else:
        # Standard k-fold doesn't use groups
        results = decoder.cross_validate(dataset, cv=cv)
    cv_results[name] = results
    print(f"{name}: {results.accuracy:.1%} (+/- {results.cv_std:.1%})")

In [None]:
# Visual comparison
fig, ax = plt.subplots(figsize=(8, 5))

names = list(cv_results.keys())
means = [cv_results[n].accuracy for n in names]
stds = [cv_results[n].cv_std for n in names]

x = np.arange(len(names))
ax.bar(x, means, yerr=stds, capsize=5, color='steelblue', edgecolor='black')
ax.axhline(y=0.5, color='red', linestyle='--', label='Chance')

ax.set_xticks(x)
ax.set_xticklabels(names, rotation=30, ha='right')
ax.set_ylabel('Accuracy')
ax.set_title('CV Strategy Comparison')
ax.legend()

plt.tight_layout()
plt.show()

## Key Takeaways

1. **Always use group-aware CV** for neuroimaging data
2. **LORO** is standard for single-subject fMRI
3. **LOSO** tests generalization across subjects
4. **Permutation testing** provides statistical significance
5. Standard k-fold may inflate accuracy due to autocorrelation

## Next Steps

- **05_searchlight.ipynb**: Whole-brain searchlight analysis