In [None]:
# @title Enter the `PyRosettaCluster` output directory
from pathlib import Path

# @markdown ##### If the path begins with `/content/drive/MyDrive`, you will be asked to mount Google Drive
output_path = "/content/drive/MyDrive/pyrosettacluster-examples_reproduce-example-1" # @param {type:"string"}
output_path = Path(output_path)

if str(output_path).startswith("/content/drive/MyDrive"):
    from google.colab import drive
    drive.mount("/content/drive")

In [None]:
# @title Create a temporary `pixi` project solely for environment extraction
import os
import shutil
from pathlib import Path

# Install pixi
if not shutil.which("pixi"):
    !curl -fsSL https://pixi.sh/install.sh | sh
    os.environ["PATH"] = f"{os.getenv('PATH')}{os.pathsep}/root/.pixi/bin"
!pixi --version

# Clone pyrosetta-extras GitHub repository
extras_repo_path = Path.cwd() / "pyrosetta-extras"
if not extras_repo_path.is_dir():
    !git clone https://github.com/RosettaCommons/pyrosetta-extras.git {extras_repo_path}

# Setup pixi project
rosettacommons_conda_channel = "https://conda.rosettacommons.org"
project_name = "extract_environment"
manifest_path = Path.cwd() / project_name / "pixi.toml"
if not manifest_path.exists():
    !pixi init {project_name}
    !pixi workspace --manifest-path {manifest_path} --no-progress channel add --prepend {rosettacommons_conda_channel}
    !pixi add --manifest-path {manifest_path} --no-progress python=3.14 pyrosetta
    !pixi add --manifest-path {manifest_path} --no-progress --pypi pyrosetta-distributed

In [None]:
# @title Enter/Upload a PyRosettaCluster output decoy or scorefile to be reproduced
from google.colab import files
from IPython.display import display, Markdown

# @markdown ##### Enter a Google Drive path, or leave empty to be prompted to upload a file
input_file = "/content/drive/MyDrive/pyrosettacluster-examples_example-1/decoys/0000/example-1_ee4e3f706805477b9fd81c8ff3516949.pdb.bz2" # @param {type:"string"}
if not input_file:
    display(Markdown("### Upload a PyRosettaCluster output decoy or scorefile"))
    uploaded_dict = files.upload()
    input_file = Path(f"/content/{next(iter(uploaded_dict.keys()))}")

In [None]:
# @title Extract environment from PyRosettaCluster output decoy or scorefile
env_dir = Path("/content/reproduce_env")
dump_env_file_module = extras_repo_path / "pyrosettacluster" / "dump_env_file.py"
!pixi run --manifest-path {manifest_path} \
    python {dump_env_file_module} \
    --input_file {input_file} \
    --env_dir {env_dir}

In [None]:
# @title Recreate the original `pixi` project
os.environ["PYROSETTACLUSTER_ENVIRONMENT_MANAGER"] = "pixi"
recreate_env_module = extras_repo_path / "pyrosettacluster" / "recreate_env.py"
!python {recreate_env_module} --env_dir {env_dir}

In [None]:
# @title Prepare original GitHub repository
# @markdown ##### Enter the GitHub SHA1 (as shown above)
sha1 = "28380a55bf3dcf6a3dadcc75cd4e7510caff494b" # @param {type:"string"}
# @markdown ##### Enter the original GitHub repository organization/owner
repo_owner = "klimaj" # @param {type:"string"}
# @markdown ##### Enter the original GitHub repository name
repo_name = "pyrosettacluster-examples" # @param {type:"string"}

In [None]:
# @title Checkout the original GitHub repository at the SHA1
repo_path = Path(f"/content/{repo_name}")
!git clone --no-checkout https://github.com/{repo_owner}/{repo_name}.git {repo_path}
!cd {repo_path} && \
    git fetch origin {sha1} && \
    git checkout {sha1}

In [None]:
# @title Define the PyRosettaCluster simulation reproduction module
%%writefile {repo_path}/reproduce.py

import argparse
import pyrosetta

from dask.distributed import LocalCluster, Client
from pyrosetta.distributed.cluster import reproduce

from src.protocols.pyrosetta import blueprintbdr
from src.runners.example_1 import get_input_packed_pose, initialize_pyrosetta


def main(
    input_file: str,
    output_path: str,
    scratch_dir: str,
    n_workers: int,
):
    # Setup reproduction simulation like the original
    initialize_pyrosetta()
    input_packed_pose = get_input_packed_pose()

    # Run reproduction simulation
    with LocalCluster(
        n_workers=n_workers,
        threads_per_worker=2,
        memory_limit=f"{12.7 / n_workers:.2f}GB",
        scheduler_port=8786,
        dashboard_address=":8787",
    ) as cluster, Client(cluster) as client:
        reproduce(
            input_file=input_file,
            protocols=None, # Auto-detect imported protocol(s)
            client=client,
            input_packed_pose=input_packed_pose,
            instance_kwargs={
                "output_path": output_path,
                "scratch_dir": scratch_dir,
                "project_name": "pyrosettacluster-examples",
                "simulation_name": "example-1-reproduce",
            },
            skip_corrections=False,
            init_from_file_kwargs=None,
        )


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--input_file", type=str, required=True)
    parser.add_argument("--output_path", type=str, required=True)
    parser.add_argument("--scratch_dir", type=str, required=True)
    parser.add_argument("--n_workers", type=int, required=True)
    args = parser.parse_args()
    main(
        args.input_file,
        args.output_path,
        args.scratch_dir,
        args.n_workers,
    )

In [None]:
# @title 🚀 Reproduce the PyRosettaCluster simulation
scratch_dir = Path("/content/scratch")
n_workers = os.cpu_count()
manifest_path = env_dir / "pixi.toml"
!cd {repo_path} && \
    pixi run --manifest-path {manifest_path} \
    python -m reproduce \
    --input_file {input_file} \
    --output_path {output_path} \
    --scratch_dir {scratch_dir} \
    --n_workers {n_workers}

In [None]:
# @title Validate that the original and reproduced structures are identical
validation_module = extras_repo_path / "actions" / "pyrosettacluster" / "assert_coordinates.py"
reproduce_output_file = next(iter((output_path / "decoys" / "0000").glob("*.pdb.bz2")))
!pixi run --manifest-path {manifest_path} \
    python {validation_module} \
    --original_output_file {input_file} \
    --reproduce_output_file {reproduce_output_file}

In [None]:
# @title Optional: Enter/Upload the original PyRosettaCluster output scorefile
from google.colab import files
from IPython.display import display, Markdown

# @markdown ##### Enter a Google Drive path, or leave empty to be prompted to upload a file
original_scorefile = "/content/drive/MyDrive/pyrosettacluster-examples_example-1/scores.json" # @param {type:"string"}
if not original_scorefile:
    display(Markdown("### Upload the original PyRosettaCluster output scorefile"))
    uploaded_dict = files.upload()
    original_scorefile = Path(f"/content/{next(iter(uploaded_dict.keys()))}")

In [None]:
# @title Optional: Plot results
reproduce_scorefile = output_path / "scores.json"
!cd {repo_path} && \
    git fetch origin && \
    git checkout origin/main -- src/plotting src/utils.py && \
    pixi run --manifest-path {manifest_path} \
    python -m src.plotting.example_1 \
    --original_scorefile {original_scorefile} \
    --reproduce_scorefile {reproduce_scorefile} \
    --output_path {output_path} \
    --legend_fontsize 8.8 \
    --y_tick_spacing 5 \
    --set_ylim_top 1.5

In [None]:
from PIL import Image
from IPython.display import display

display(Image.open(output_path / "rmsd_total_score_seed_scatter_plot.png"))