# Diffuse Proteins, Design and Predict Their Sequences

### Setup

In [1]:
import protslurm
from protslurm import jobstarters
from protslurm.poses import Poses
from protslurm.tools.rfdiffusion import RFdiffusion

import importlib
importlib.reload(protslurm)
importlib.reload(protslurm.poses)
importlib.reload(protslurm.tools.rfdiffusion)
importlib.reload(protslurm.jobstarters)
importlib.reload(jobstarters)
#importlib.reload(rfdiffusion)

<module 'protslurm.jobstarters' from '/home/markus/projects/ProtSLURM/protslurm/jobstarters.py'>

In [2]:
# first, we define the jobstarters that should be used throughout the script (this can be changed here to slurm if needed)
cpu_jobstarter = protslurm.jobstarters.LocalJobStarter(max_cores=7)
gpu_jobstarter = protslurm.jobstarters.LocalJobStarter(max_cores=1)

# on cluster, gpu_jobstarter looks very different:
sbatch_gpu_jobstarter = protslurm.jobstarters.SbatchArrayJobstarter(max_cores=10, gpus=1)

# we setup an empty Poses object
proteins = Poses(
    poses=None,
    work_dir="./rfdiffusion_mpnn_esm_output/",
    storage_format="pickle",
    jobstarter=cpu_jobstarter
)

print(proteins.df)

Empty DataFrame
Columns: [input_poses, poses, poses_description]
Index: []


# Creating De Novo Proteins from Scratch with RFDiffusion

In [3]:
# setup runner.
rfdiffusion_runner = RFdiffusion(jobstarter=gpu_jobstarter)

# start runner
diffused_proteins = rfdiffusion_runner.run(
    poses=proteins,
    prefix="diffusion",
    num_diffusions=2,
    options="'contigmap.contigs=[30-30]'"
)

In [4]:
print(diffused_proteins.df)
for pose in diffused_proteins:
    print(pose[["poses_description", "diffusion_plddt"]])

   input_poses                                              poses  \
0          NaN  /home/markus/projects/ProtSLURM/examples/rfdif...   
1          NaN  /home/markus/projects/ProtSLURM/examples/rfdif...   

  poses_description  diffusion_plddt  \
0    diff_0001_0002         0.984567   
1    diff_0001_0001         0.985791   

                              diffusion_perres_plddt  \
0  [0.9473180175, 0.9793127775, 0.9843268991, 0.9...   
1  [0.9527550340000001, 0.9807837605, 0.986410498...   

  diffusion_con_hal_pdb_idx diffusion_con_ref_pdb_idx diffusion_sampled_mask  \
0                        []                        []                [30-30]   
1                        []                        []                [30-30]   

                                 diffusion_input_pdb diffusion_description  \
0  /home/markus/RFdiffusion/rfdiffusion/inference...        diff_0001_0002   
1  /home/markus/RFdiffusion/rfdiffusion/inference...        diff_0001_0001   

                          

# Sequence Design for diffused poses with LigandMPNN

In [15]:
from protslurm.tools import ligandmpnn

importlib.reload(ligandmpnn)
importlib.reload(protslurm.runners)

from protslurm.tools.ligandmpnn import LigandMPNN


# setup mover
ligandmpnn_runner = LigandMPNN(jobstarter=cpu_jobstarter)

In [16]:
# design 8 sequences per Pose with LigandMPNN
proteins = ligandmpnn_runner.run(
    poses=diffused_proteins,
    prefix="mpnn_design",
    jobstarter=cpu_jobstarter,
    nseq=8,
    model_type="soluble_mpnn"
)

[['/home/markus/anaconda3/envs/ligandmpnn/bin/python3 /home/markus/LigandMPNN/run.py --checkpoint_soluble_mpnn=/home/markus/LigandMPNN/model_params/solublempnn_v_48_020.pt --out_folder /home/markus/projects/ProtSLURM/examples/rfdiffusion_mpnn_esm_output/mpnn_design/ --pdb_path /home/markus/projects/ProtSLURM/examples/rfdiffusion_mpnn_esm_output/diffusion/output_pdbs/diff_0001_0002.pdb --model_type=soluble_mpnn --number_of_batches=8 --parse_atoms_with_zero_occupancy=1'], ['/home/markus/anaconda3/envs/ligandmpnn/bin/python3 /home/markus/LigandMPNN/run.py --checkpoint_soluble_mpnn=/home/markus/LigandMPNN/model_params/solublempnn_v_48_020.pt --out_folder /home/markus/projects/ProtSLURM/examples/rfdiffusion_mpnn_esm_output/mpnn_design/ --pdb_path /home/markus/projects/ProtSLURM/examples/rfdiffusion_mpnn_esm_output/diffusion/output_pdbs/diff_0001_0001.pdb --model_type=soluble_mpnn --number_of_batches=8 --parse_atoms_with_zero_occupancy=1']]


# Predict Designed Sequences using ESMFold

In [20]:
from protslurm.tools import esmfold

importlib.reload(esmfold)

from protslurm.tools.esmfold import ESMFold

In [21]:
# set up runner
esmfold_runner = ESMFold(jobstarter=gpu_jobstarter)

# start predicting
predicted_proteins = esmfold_runner.run(
    poses=proteins,
    prefix="esm"
)

ValueError: No path is set for esmfold.py. Set the path in the config.py file under ESMFOLD_SCRIPT_PATH.