# Tutorial: End-to-End Results Demo

Audience:
- Anyone reviewing the final project conclusions for reproducibility.

Prerequisites:
- Install dependencies from `requirements.txt`.
- Run from this repository with a Python kernel that has `pandas`.

Learning goals:
- Rebuild aggregated results from canonical artifacts in `results/`.
- Verify the final conclusions (PCA baseline performance and LSFT effects).
- Locate the generated summary markdown for GitHub-ready reporting.


## Outline

1. Locate repository root
2. Run one script to regenerate and verify conclusions
3. Inspect core summary tables
4. Review the generated markdown report


In [None]:
from __future__ import annotations

import subprocess
import sys
from pathlib import Path

import pandas as pd
from IPython.display import display

def find_repo_root(start: Path) -> Path:
    for candidate in (start, *start.parents):
        if (candidate / '.git').exists():
            return candidate
    raise RuntimeError('Could not find repository root (missing .git).')

repo_root = find_repo_root(Path.cwd())
repo_root


In [None]:
demo_script = repo_root / 'scripts' / 'demo' / 'run_end_to_end_results_demo.py'
cmd = [sys.executable, str(demo_script)]
completed = subprocess.run(cmd, cwd=repo_root, capture_output=True, text=True, check=True)
print(completed.stdout)


In [None]:
agg_dir = repo_root / 'aggregated_results'
best = pd.read_csv(agg_dir / 'best_baseline_per_dataset.csv')
lsft = pd.read_csv(agg_dir / 'lsft_improvement_summary.csv')
logo = pd.read_csv(agg_dir / 'logo_generalization_all_analyses.csv')

print('Best baseline by dataset and analysis type:')
display(best)

print('Single-cell LSFT mean delta for key baselines:')
display(lsft[lsft['baseline'].isin(['lpm_selftrained', 'lpm_scgptGeneEmb', 'lpm_randomPertEmb'])][['dataset', 'baseline', 'mean_delta_r']])

print('Top LOGO baseline by analysis type:')
display(
    logo.groupby(['analysis_type', 'baseline'], as_index=False)['pearson_r']
    .mean()
    .sort_values(['analysis_type', 'pearson_r'], ascending=[True, False])
    .groupby('analysis_type')
    .head(1)
)


In [None]:
summary_path = agg_dir / 'final_conclusions_verified.md'
print(summary_path)
print('-' * 80)
print(summary_path.read_text(encoding='utf-8'))
