<a href="https://colab.research.google.com/github/sokrypton/ColabFold/blob/main/BioEmu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Biomolecular Emulator (BioEmu) in ColabFold**
<img src="https://github.com/microsoft/bioemu/raw/main/assets/emu.png" height="130" align="right" style="height:240px">

[BioEmu](https://github.com/microsoft/bioemu) is a framework for emulating biomolecular dynamics and integrating structural prediction tools to accelerate research in structural biology and protein engineering. This notebook builds uses ColabFold to generate the MSA.



For more details, please read the [BioEmu Preprint](https://www.biorxiv.org/content/10.1101/2024.12.05.626885v1.abstract).


In [None]:
#@title Input protein sequence(s), then hit `Runtime` -> `Run all`
sequence = "PIAQIHILEGRSDEQKETLIREVSEAISRSLDAPLTSVRVIITEMAKGHFGIGGELASK"  #@param {type:"string"}
num_samples = 10  #@param {type:"integer"}
jobname = "test"  #@param {type:"string"}
side_chain_reconstruction = False
# #@param {type:"boolean"}
# ------------------------
# Copied logic from ColabFold
# ------------------------
import os
import re
import hashlib

def add_hash(x, seq):
    """Append a short SHA-1 hash of seq to x."""
    return x + "_" + hashlib.sha1(seq.encode()).hexdigest()[:5]

def folder_is_free(folder):
    """Return True if folder doesn't exist."""
    return not os.path.exists(folder)

jobname_clean = re.sub(r'\W+', '', jobname)
sequence = "".join(sequence.split())
jobname = add_hash(jobname_clean, sequence)

if not folder_is_free(jobname):
    n = 0
    while not folder_is_free(f"{jobname}_{n}"):
        n += 1
    jobname = f"{jobname}_{n}"

output_dir = os.path.join("/content", jobname)
os.makedirs(output_dir, exist_ok=True)


In [None]:
#@title Install
%%time
# Setup BioEmu
!git clone https://github.com/microsoft/bioemu.git bioemu_repo
script = """#!/bin/bash
set -e

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )

# Set up colabfold
if [ -z "$COLABFOLD_DIR" ]; then
  echo "COLABFOLD_DIR not set. Setting to $HOME/.localcolabfold"
  export COLABFOLD_DIR=$HOME/.localcolabfold
fi

if [ -f $COLABFOLD_DIR/localcolabfold/colabfold-conda/bin/colabfold_batch ]; then
  echo "colabfold already installed in $COLABFOLD_DIR/localcolabfold/colabfold-conda/bin/colabfold_batch"
else
  bash $SCRIPT_DIR/colabfold_setup/setup.sh
fi

# Install bioemu and bioemu-benchmarks
uv pip install -q --system $SCRIPT_DIR bioemu-benchmarks
"""

with open('bioemu_repo/setup_colab.sh', 'w') as handle:
  handle.write(script)

!bash bioemu_repo/setup_colab.sh > /dev/null 2>&1
!pip install -q py3Dmol

In [None]:
#@title Run BioEmu Sampling
import subprocess, sys

cmd = [
    sys.executable, "-m", "bioemu.sample",
    "--sequence", sequence,
    "--num_samples", str(num_samples),
    "--output_dir", output_dir
]

print("Running command:\n", " ".join(cmd))
subprocess.run(cmd)
print("\nSampling complete. Check the", output_dir, "directory for PDB and XTC files.")

In [None]:
#@title Visualize Frames

import os
import tempfile
import ipywidgets as widgets
from IPython.display import display
import py3Dmol
import mdtraj as md

# Provide your actual output_dir, or ensure these files exist
pdb_path = os.path.join(output_dir, "topology.pdb")
xtc_path = os.path.join(output_dir, "samples.xtc")
def frame_to_pdb_string(frame):
    """Write a single-frame mdtraj.Trajectory to a temp PDB file, return as string."""
    with tempfile.NamedTemporaryFile(mode='w', suffix='.pdb', delete=False) as tmp:
        tmp_filename = tmp.name
    frame.save_pdb(tmp_filename)
    with open(tmp_filename, 'r') as f:
        pdb_str = f.read()
    os.remove(tmp_filename)
    return pdb_str

# Create the py3Dmol view once
view = py3Dmol.view(width=600, height=400)
traj = md.load(xtc_path, top=pdb_path)
pdb_str = frame_to_pdb_string(traj[:1])
view.addModel(pdb_str, "pdb")
view.setStyle({"cartoon": {"color": "spectrum"}})
view.zoomTo()
view.show()  # Show the viewer just once here

# Create the slider
frame_slider = widgets.IntSlider(
    value=0,
    min=0,
    max=len(traj) - 1,
    step=1,
    description='Frame'
)

def on_frame_change(change):
    """Update the existing py3Dmol view in place when the slider changes."""
    frame_idx = change["new"]
    pdb_str = frame_to_pdb_string(traj[frame_idx:frame_idx+1])

    view.removeAllModels()
    view.addModel(pdb_str, "pdb")
    view.setStyle({"cartoon": {"color": "spectrum"}})
    view.zoomTo()
    # Force an update of the existing view instead of creating a new cell output
    view.update()

frame_slider.observe(on_frame_change, names="value")
display(frame_slider)


In [None]:
#@title Zip and Download All Results
from google.colab import files

zip_name = "bioemu_samples.zip"
!zip -r $zip_name $output_dir
files.download(zip_name)