In [None]:
# @title Enter the `PyRosettaCluster` output directory
from pathlib import Path

# @markdown ##### If the path begins with `/content/drive/MyDrive`, you will be asked to mount Google Drive
output_path = "/content/drive/MyDrive/pyrosettacluster-examples_reproduce-example-2" # @param {type:"string"}
output_path = Path(output_path)

if str(output_path).startswith("/content/drive/MyDrive"):
    from google.colab import drive
    drive.mount("/content/drive")

In [None]:
# @title Create a temporary `pixi` project solely for environment extraction
import os
import shutil
from pathlib import Path

# Install pixi
if not shutil.which("pixi"):
    !curl -fsSL https://pixi.sh/install.sh | sh
    os.environ["PATH"] = f"{os.getenv('PATH')}{os.pathsep}/root/.pixi/bin"
!pixi --version

# Clone pyrosetta-extras GitHub repository
extras_repo_path = Path.cwd() / "pyrosetta-extras"
if not extras_repo_path.is_dir():
    !git clone https://github.com/RosettaCommons/pyrosetta-extras.git {extras_repo_path}

# Setup pixi project
rosettacommons_conda_channel = "https://conda.rosettacommons.org"
project_name = "extract_environment"
manifest_path = Path.cwd() / project_name / "pixi.toml"
if not manifest_path.exists():
    !pixi init {project_name}
    !pixi workspace --manifest-path {manifest_path} --no-progress channel add --prepend {rosettacommons_conda_channel}
    !pixi add --manifest-path {manifest_path} --no-progress python=3.12 pyrosetta=2026.5 # Ensure original PyRosetta build signature
    !pixi add --manifest-path {manifest_path} --no-progress --pypi pyrosetta-distributed

In [None]:
# @title Enter/Upload a PyRosettaCluster output decoy or scorefile to be reproduced
from google.colab import files
from IPython.display import display, Markdown

# @markdown ##### Enter a Google Drive path, or leave empty to be prompted to upload a file
input_file = "/content/drive/MyDrive/pyrosettacluster-examples_example-2/decoys/0000/example-2-gpu-0_d906fd986bc14c029cff3e39159e1850.b64_pose" # @param {type:"string"}
if not input_file:
    display(Markdown("### Upload a PyRosettaCluster output decoy or scorefile"))
    uploaded_dict = files.upload()
    input_file = f"/content/{next(iter(uploaded_dict.keys()))}"
input_file = Path(input_file)

In [None]:
# @title Extract environment from PyRosettaCluster output decoy or scorefile
env_dir = Path("/content/reproduce_env")
dump_env_file_module = extras_repo_path / "pyrosettacluster" / "dump_env_file.py"
!pixi run --manifest-path {manifest_path} \
    python {dump_env_file_module} \
    --input_file {input_file} \
    --env_dir {env_dir}

In [None]:
# @title Recreate the original `pixi` project
os.environ["PYROSETTACLUSTER_ENVIRONMENT_MANAGER"] = "pixi"
recreate_env_module = extras_repo_path / "pyrosettacluster" / "recreate_env.py"
!python {recreate_env_module} --env_dir {env_dir}

In [None]:
# @title Prepare original GitHub repository
# @markdown ##### Enter the GitHub SHA1 (as shown above)
sha1 = "cc4c3ecfadaee14dc35b8cc473b64313d3092685" # @param {type:"string"}
# @markdown ##### Enter the original GitHub repository organization/owner
repo_owner = "klimaj" # @param {type:"string"}
# @markdown ##### Enter the original GitHub repository name
repo_name = "pyrosettacluster-examples" # @param {type:"string"}

In [None]:
# @title Checkout the original GitHub repository at the SHA1
repo_path = Path(f"/content/{repo_name}")
!git clone --no-checkout https://github.com/{repo_owner}/{repo_name}.git {repo_path}
!cd {repo_path} && \
    git fetch origin {sha1} && \
    git checkout {sha1}

In [None]:
# @title Define the PyRosettaCluster simulation reproduction module
%%writefile {repo_path}/reproduce.py

import argparse
import pyrosetta

from dask.distributed import LocalCluster, Client
from pyrosetta.distributed.cluster import get_scores_dict, reproduce

from src.protocols.foundry import proteinmpnn, rf3, rfd3
from src.protocols.pyrosetta import cart_min, compute_rmsd, cst_cart_min_poly_gly
from src.runners.example_2 import Resources, download_checkpoints, get_system_info, initialize_pyrosetta
from src.utils import get_sha256_digest


def verify_system_info(original_system_info, reproduce_system_info):
    for k in original_system_info:
        reproduce_val = reproduce_system_info.get(k)
        original_val = original_system_info.get(k)
        if k == "checkpoints":
            for ckpt_file in original_val:
                if ckpt_file not in reproduce_val:
                    raise FileNotFoundError(f"The original checkpoint file '{ckpt_file}' was not downloaded.")
                if reproduce_val.get(ckpt_file) != original_val.get(ckpt_file):
                    raise ValueError(f"Checksums differ for checkpoint file '{ckpt_file}'.")
        else:
            if reproduce_val != original_val:
                raise ValueError(f"Original info '{original_val}' is not identical to current: '{reproduce_val}'.")


def main(
    input_file: str,
    output_path: str,
    scratch_dir: str,
    gpu: bool,
):
    # Setup reproduction simulation like the original
    initialize_pyrosetta()
    download_checkpoints()
    system_info = get_system_info(gpu)

    # Verify checkpoint checksums
    original_system_info = get_scores_dict(input_file)["instance"]["system_info"]
    verify_system_info(original_system_info, system_info)

    # Run reproduction simulation
    n_workers = 1
    with LocalCluster(
        n_workers=n_workers,
        threads_per_worker=2,
        memory_limit=f"{12.7 / n_workers:.2f}GB",
        scheduler_port=8786,
        dashboard_address=":8787",
    ) as cluster, Client(cluster) as client:
        reproduce(
            input_file=input_file,
            protocols=None, # Auto-detect imported protocol(s)
            client=client,
            input_packed_pose=None,
            instance_kwargs={
                "output_path": output_path,
                "scratch_dir": scratch_dir,
                "project_name": "pyrosettacluster-examples",
                "simulation_name": f"example-2-gpu-{int(gpu)}",
                "system_info": system_info,
            },
            resources=None,
            skip_corrections=False,
            init_from_file_kwargs=None,
        )


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--input_file", type=str, required=True)
    parser.add_argument("--output_path", type=str, required=True)
    parser.add_argument("--scratch_dir", type=str, required=True)
    parser.add_argument("--gpu", action=argparse.BooleanOptionalAction, default=False)
    args = parser.parse_args()
    main(
        args.input_file,
        args.output_path,
        args.scratch_dir,
        args.gpu,
    )

In [None]:
# @title 🚀 Reproduce the PyRosettaCluster simulation
from torch.cuda import is_available

env_dir_manifest_path = env_dir / "pixi.toml"
scratch_dir = Path("/content/scratch")
env_file = Path.cwd() / ".env"
env_file.write_text(
    "\n".join(
        [
            "PDB_MIRROR_PATH=",
            "CCD_MIRROR_PATH=",
            "LOCAL_MSA_DIRS=",
            "HBPLUS_PATH=",
            "X3DNA_PATH=",
            "DSSP_PATH=",
            "HHFILTER_PATH=",
            "MMSEQS2_PATH=",
            "COLABFOLD_LOCAL_DB_PATH_GPU=",
            "COLABFOLD_LOCAL_DB_PATH_CPU=",
            "COLABFOLD_NET_DB_PATH_GPU=",
            "COLABFOLD_NET_DB_PATH_CPU=",
            "FOUNDRY_CHECKPOINT_DIRS=",
        ]
    )
)

gpu_flag = "--gpu" if is_available() else "--no-gpu"
!export $(xargs < {env_file}) && \
    cd {repo_path} && \
    pixi run --manifest-path {env_dir_manifest_path} \
    python -m reproduce {gpu_flag} \
    --input_file {input_file} \
    --output_path {output_path} \
    --scratch_dir {scratch_dir}

In [None]:
# @title Validate that the original and reproduced structures are identical
import json

if gpu_flag == "--gpu":
    print("*Warning*: Cannot guarantee PyRosettaCluster simulation reproducibility with GPUs enabled.")

def get_reproduce_output_file(output_path, protocol_number=5):
    with (output_path / "scores.json").open("r") as f:
        for line in f:
            d = json.loads(line)
            if d["scores"]["protocol_number"] == protocol_number:
                return Path(d["metadata"]["output_file"]).with_suffix(".b64_pose")

validation_module = extras_repo_path / "actions" / "pyrosettacluster" / "assert_coordinates.py"
reproduce_output_file = get_reproduce_output_file(output_path, protocol_number=5)
!pixi run --manifest-path {env_dir_manifest_path} \
    python {validation_module} \
    --original_output_file {input_file} \
    --reproduce_output_file {reproduce_output_file}

In [None]:
# @title Optional: Enter/Upload the original PyRosettaCluster output scorefile
from google.colab import files
from IPython.display import display, Markdown

# @markdown ##### Enter a Google Drive path, or leave empty to be prompted to upload a file
original_scorefile = "/content/drive/MyDrive/pyrosettacluster-examples_example-2/scores.bz2" # @param {type:"string"}
if not original_scorefile:
    display(Markdown("### Upload the original PyRosettaCluster output scorefile"))
    uploaded_dict = files.upload()
    original_scorefile = Path(f"/content/{next(iter(uploaded_dict.keys()))}")

In [None]:
# @title Optional: Analyze original vs. reproduced results
reproduce_scorefile = output_path / "scores.bz2"
!cd {repo_path} && \
    git fetch origin && \
    git checkout origin/main -- src/plotting src/utils.py && \
    pixi run --manifest-path {env_dir_manifest_path} \
    python -m src.plotting.analyze_reproduce_example_2 \
    --original_scorefile {original_scorefile} \
    --reproduce_scorefile {reproduce_scorefile}