# 02 — Single-shot baseline (ProteinMPNN → ESMFold)

In [None]:
!pip -q install transformers accelerate biopython pandas numpy matplotlib tqdm scikit-learn pyyaml
import torch, platform
print("torch", torch.__version__, "cuda?", torch.cuda.is_available(), "python", platform.python_version())


In [None]:
import sys
import os
from pathlib import Path

# Add project root to sys.path so we can import 'src'
current_path = Path(os.getcwd()).resolve()
project_root = None
p = current_path
for _ in range(5):
    if (p / "src").exists() and (p / "src").is_dir():
        project_root = p
        break
    p = p.parent

if project_root and str(project_root) not in sys.path:
    sys.path.append(str(project_root))
    print(f"Added to sys.path: {project_root}")
elif current_path.name == 'colab':
    # Fallback for Colab default structure if cloning repo
    # If we are in /content/repo/colab, parent is repo
    parent = str(current_path.parent)
    if parent not in sys.path:
        sys.path.append(parent)
        print(f"Added to sys.path: {parent}")


In [None]:
!git clone -q https://github.com/dauparas/ProteinMPNN.git
!pip -q install -r ProteinMPNN/requirements.txt


In [None]:
from pathlib import Path
import pandas as pd
from src.data.scaffolds import load_scaffold
from src.generate.proteinmpnn import run_proteinmpnn, read_fasta_sequences
from src.evaluate.esmfold_eval import evaluate_batch

OUT = Path("results")
PDB_ID, CHAIN = "1AKL","A"
sc = load_scaffold(PDB_ID, CHAIN, OUT/"scaffolds")

fasta = run_proteinmpnn(sc.pdb_path, OUT/"mpnn_single", Path("ProteinMPNN"), num_seqs=50, sampling_temp=0.2, seed=42)
seqs = read_fasta_sequences(fasta)

fold_res = evaluate_batch(seqs[:30], model_id="facebook/esmfold_v1", device="cuda", out_dir=OUT/"pdb"/"single_shot")
df = pd.DataFrame([{"sequence":r.sequence,"mean_plddt":r.mean_plddt,"pdb":str(r.pdb_path)} for r in fold_res])
df.sort_values("mean_plddt", ascending=False).head(10)


In [None]:
from pathlib import Path
OUT = Path("results")
(OUT/"tables").mkdir(parents=True, exist_ok=True)
df.to_csv(OUT/"tables"/"single_shot.csv", index=False)
print("saved", OUT/"tables"/"single_shot.csv")

# Save results to GitHub (optional)
try:
    from src.utils.github_save import save_results_to_github
    save_results_to_github("exp02_single_shot")
except Exception as e:
    print(f"Note: GitHub save skipped ({e})")
