# 01 Harmonization
Site effect correction using neuroHarmonize ComBat

In [None]:
from core.config import initialize_notebook
from core.harmonize.pipeline import run_harmonization_pipeline
import numpy as np

env = initialize_notebook()

In [None]:
# Run the complete harmonization pipeline
harmonized_results = run_harmonization_pipeline(env)

print("Harmonization pipeline complete!")
print(f"Results available: {list(harmonized_results.keys())}")
print(f"Train: {harmonized_results['train'].shape}")
print(f"Val: {harmonized_results['val'].shape}")
print(f"Test: {harmonized_results['test'].shape}")

In [None]:
# Optional: Validation and quality checks
print("=== Validation Results ===")

# Check for NaNs
nan_counts = {
    'train': np.isnan(harmonized_results['train']).sum(),
    'val': np.isnan(harmonized_results['val']).sum(), 
    'test': np.isnan(harmonized_results['test']).sum()
}

print(f"NaN counts: {nan_counts}")

if all(count == 0 for count in nan_counts.values()):
    print("✅ No NaN values found - harmonization successful!")
else:
    print("⚠️  WARNING: NaN values detected in harmonized data")

# Data statistics
print(f"\nData ranges:")
for split, data in harmonized_results.items():
    if split != 'model':
        print(f"{split.capitalize()}: min={data.min():.3f}, max={data.max():.3f}, std={data.std():.3f}")

In [None]:
# Show where files are saved
run_cfg = env.configs.run
output_dir = env.repo_root / "outputs" / run_cfg["run_name"] / run_cfg["run_id"] / f"seed_{run_cfg['seed']}"

print("=== Output Locations ===")
print(f"Harmonized data: {output_dir / 'harmonized'}")
print(f"Harmonization model: {output_dir / 'models' / 'harmonization.pkl'}")
print(f"Artifacts: {output_dir / 'artifacts'}")

# Check files exist
harmonized_dir = output_dir / "harmonized" 
if harmonized_dir.exists():
    files = list(harmonized_dir.glob("*.npy"))
    print(f"\nSaved harmonized files: {[f.name for f in files]}")
else:
    print("\nHarmonized directory not found")