<a href="https://colab.research.google.com/github/WaymentSteeleLab/Dyna-1/blob/main/colab/Dyna_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<img src="" height="200" align="right" style="height:240px">

##Dyna-1
Easy-to-use dynamics prediction with [Dyna-1](https://www.biorxiv.org/content/10.1101/2025.03.19.642801v1). Given a sequence and/or structure, Dyna-1 will predict the probability that each residue  experiences millisecond motions. Inference is made using a pre-trained version of [ESM-3](https://www.biorxiv.org/content/10.1101/2024.07.01.600583v1), is subject to the License terms of ESM-3, and requires read permission to the [esm3-sm-open-v1](https://huggingface.co/EvolutionaryScale/esm3-sm-open-v1) weights.
<br><br>
A version Dyna-1 that runs on pre-trained [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) and is available on a Non-Commercial License Agreement License on [Google Colab](https://colab.research.google.com/drive/1K3rWLd6I3tCe57jV-l-UbSmbc7zkQvSK?usp).
<br><br>
<i>This colab was written by Gina El Nesr (gelnesr@stanford.edu)</i>

In [None]:
#@title **IMPORTANT:** run this cell first before doing 'Runtime →> Run all'
#@markdown - The latest update to Google Colab broke numpy; this is a temporary patch.
#@markdown - Note after running this cell, the session will crash (this is normal).

import os, numpy, signal

if numpy.__version__ != '1.26.4':
  print(f"Current numpy version {numpy.__version__} is incorrect. Installing 1.26.4...")
  os.system("'pip uninstall -y numpy")
  os.system("pip install numpy==1.26.4")
  # Restart the runtime using os.kill
  os.kill(os. getpid(), signal.SIGKILL)
else:
  print ("Numpy version is correct (1.26.4)")

In [None]:
#@title Install dependencies & download model weights (~3 min)
!git clone https://github.com/WaymentSteeleLab/Dyna-1.git --depth 1

print('installing requirements for Dyna-1...')
import os
os.system('pip install -r Dyna-1/requirements.txt')
os.system('pip install gdown, py3Dmol, torcheval')
print('requirements installed!')

import gdown
if not os.path.isfile('Dyna-1/model/weights/dyna1.pt'):
  print('downloading model weights...')
  gdown.download('https://drive.google.com/uc?id=1UJWpPKPgJH9AYADMIqL0MzyU772CrP9t', 'Dyna-1/model/weights/dyna1.pt', quiet=False)
print('model weights downloaded!')
if not os.path.exists('/content/outputs'):
  os.mkdir('/content/outputs')
if not os.path.exists('/content/inputs'):
  os.mkdir('/content/inputs')

In [None]:
#@title Log into HuggingFace

from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
#@title Prepare Inputs
from google.colab import files
import os
import re
import hashlib
import random
import sys

def add_hash(x,y):
  return x+"_"+hashlib.sha1(y.encode()).hexdigest()[:5]

def get_pdb(pdb_code=""):
  if os.getcwd() != '/contents/inputs':
    os.chdir('/content/inputs')

  # upload a pdb
  if pdb_code == "":
    upload_dict = files.upload()
    pdb_path = f'/content/inputs/{list(upload_dict.keys())[0]}'
    if not pdb_path.endswith('.pdb'):
      sys.exit(f"{pdb_path} does not look like a PDB file.")
    if not os.path.getsize(pdb_path):
      sys.exit(f'{pdb_path} is empty or does not exist.')
    return pdb_path

  # pdb code (4 letter)
  elif len(pdb_code) == 4:
    return pdb_code

  # check if uploaded file is pdb and non-empty
  else:
    pdb_path = f'/content/inputs/{pdb_code}'
    if not pdb_path.endswith('.pdb'):
      pdb_path = f'/content/inputs/{pdb_code}.pdb'
    if not os.path.getsize(pdb_path):
      sys.exit(f'{pdb_path} is empty.')
    return pdb_path

#@markdown ####Dyna-1 takes in either sequence, structure, or both. Please specify at least one.

#@markdown Leave blank to get an upload prompt, upload your pdb in the 'inputs' folder, or N/A if no input.
#@markdown To run inference using only the structure (no sequence input), check 'only_backbone'.
#@markdown This may be useful for evaluating designed backbones, but note that this mode is not as accurate.
pdb='1YOB' #@param {type:"string"}
chain='A' #@param {type:"string"}
only_backbone = False #@param {type:"boolean"}
#@markdown By default, the model will use the sequence from the pdb on the specified chain.
#@markdown Inputting a sequence below will overwrite the sequence on the pdb if a pdb was given.
#@markdown Otherwise, it will run as sequence-only inference which may not be as accurate.
sequence = '' #@param {type:"string"}

if pdb != 'N/A':
  pdb_path = get_pdb(pdb)
elif pdb == 'N/A' and sequence == '':
  sys.exit('Must specify an input sequence or PDB.')
elif pdb == 'N/A' and sequence != '':
  pdb, pdb_path = None, None
else:
  pdb, pdb_path = None, None
if pdb_path != None:
  pdb = pdb_path
if (chain == '' or chain == 'N/A') and pdb != None:
  sys.exit('Must specify a chain.')
if pdb == None and not len(sequence):
  sys.exit('Invalid input was given. No PDB or sequence given.')

In [None]:
#@title Model Parameters
job_name = '' #@param {type:"string"}
write_pred_to_struct = True #@param {type:"boolean"}
if pdb == None and write_pred_to_struct:
  write_pred_to_struct = False
  print('Cannot write prediction to structure since no structure was given. No PDB file will be outputted.')

In [None]:
#@title Run Dyna-1 (~30s to 2 min)
import os
if os.getcwd() != '/contents/Dyna-1':
    os.chdir('/content/Dyna-1')
import warnings
warnings.filterwarnings("ignore")
from collections import OrderedDict
import utils
import torch
import random
import argparse
import numpy as np
import pandas as pd
import MDAnalysis as mda

from model.model import *
from esm.sdk.api import ESMProtein
from esm.utils.structure.protein_chain import ProteinChain
from transformers import AutoTokenizer

DEVICE = torch.device("cpu")

if torch.cuda.is_available():
   DEVICE = torch.device("cuda:0")

def handle_name(name, pdb):
  """Processes the output file name given inputs of args.name and args.pdb; otherwise generates a random number"""
  if name:
    return f'{name}-Dyna1'
  elif pdb:
    if len(pdb) == 4:
      pdb_name = pdb
    else:
      pdb_name = pdb.split('/')[-1][:-4]
  else:
    pdb_name = random.randint(0, 100000)
  return f'{pdb_name}-Dyna1'

config, config_dict = utils.load_config(f'configs/esm3.yml', return_dict=True)
output_base = handle_name(job_name, pdb)

model = ESM_model(method='esm3').to(DEVICE)
model.load_state_dict(torch.load('model/weights/dyna1.pt', map_location=DEVICE), strict=False)
model.eval()
seq_input, struct_input = None, None

# fetch from RCSB
if pdb is not None:
  if len(pdb) == 4:
    protein_chain = ProteinChain.from_rcsb(pdb.upper(), chain_id=chain)
    protein = ESMProtein.from_protein_chain(protein_chain)
  elif os.path.isfile(pdb):
    pdb_id = pdb.split('/')[-1]
    protein_chain = ProteinChain.from_pdb(pdb, chain_id=chain, id=pdb_id)
    protein = ESMProtein.from_protein_chain(protein_chain)
  else:
    sys.exit('Invalid PDB ID or path.')
  encoder = model.model.encode(protein)
  struct_input = encoder.structure[1:-1].unsqueeze(0)
  seq = protein.sequence
  seq_input = encoder.sequence[1:-1].unsqueeze(0)

if sequence != '':
  if pdb and len(seq) != len(sequence):
    sys.exit('Length of sequence does not match length of structure input.')
  tokenizer = AutoTokenizer.from_pretrained(f"facebook/esm2_t6_8M_UR50D")
  seq = sequence
  token_seq = tokenizer.encode(sequence, add_special_tokens=False, return_tensors='np')
  seq_input = torch.from_numpy(token_seq).to(DEVICE)

sequence_id = seq_input != 4099
if only_backbone:
  seq_input = None

logits = model((seq_input, struct_input), sequence_id)
p = utils.prob_adjusted(logits).cpu().detach().numpy()

if write_pred_to_struct:
  out_pdb = f'/content/outputs/{output_base}.pdb'
  protein.to_pdb(out_pdb)
  curr = mda.Universe(out_pdb)
  curr.add_TopologyAttr('bfactors')
  protein_out = curr.select_atoms("protein")
  for residue, p_i in zip(protein_out.residues, p):
    for atom in residue.atoms:
      atom.tempfactor = p_i
  protein_out.write(out_pdb)
out_df = pd.DataFrame({'position': np.arange(1,len(p)+1), 'residue': np.array(list(seq)), 'p_exchange': p,})
out_df.to_csv(f'/content/outputs/{output_base}.csv', index=False)

In [None]:
#@title Plot probabilities
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
if os.path.isfile(f'/content/outputs/{output_base}.csv'):
  data = pd.read_csv(f'/content/outputs/{output_base}.csv')
else:
  sys.exit('No files outputted.')
probs = data['p_exchange']

# Plot the array
plt.figure(figsize=(10,5), dpi=200)
plt.plot(np.arange(1, len(probs)+1, 1), probs)
plt.ylim(0, 1)
plt.ylabel('Dyna-1 P(exchange)')
plt.title(f'{job_name}')
plt.show()