# Diffuse Proteins, Design and Predict Their Sequences

### Setup

In [2]:
import protflow
from protflow import jobstarters
from protflow.poses import Poses
from protflow.tools.rfdiffusion import RFdiffusion

In [3]:
# first, we define the jobstarters that should be used throughout the script (this can be changed here to slurm if needed)
cpu_jobstarter = protflow.jobstarters.LocalJobStarter(max_cores=7)
gpu_jobstarter = protflow.jobstarters.LocalJobStarter(max_cores=1)

# on cluster, gpu_jobstarter looks very different:
sbatch_gpu_jobstarter = protflow.jobstarters.SbatchArrayJobstarter(max_cores=10, gpus=1)

# we setup an empty Poses object
proteins = Poses(
    poses=None,
    work_dir="./rfdiffusion_mpnn_esm_output/",
    storage_format="pickle",
    jobstarter=sbatch_gpu_jobstarter
)

print(proteins.df)

Empty DataFrame
Columns: [input_poses, poses, poses_description]
Index: []


# Creating De Novo Proteins from Scratch with RFDiffusion

In [4]:
# setup runner.
rfdiffusion_runner = RFdiffusion(jobstarter=sbatch_gpu_jobstarter)

# start runner
diffused_proteins = rfdiffusion_runner.run(
    poses=proteins,
    prefix="diffusion",
    num_diffusions=5,
    options="'contigmap.contigs=[70-70]'"
)

In [5]:
print(diffused_proteins.df)
for pose in diffused_proteins:
    print(pose[["poses_description", "diffusion_plddt"]])

   input_poses                                              poses  \
0          NaN  /home/mabr3112/ProtFlow/examples/rfdiffusion_m...   
1          NaN  /home/mabr3112/ProtFlow/examples/rfdiffusion_m...   
2          NaN  /home/mabr3112/ProtFlow/examples/rfdiffusion_m...   
3          NaN  /home/mabr3112/ProtFlow/examples/rfdiffusion_m...   
4          NaN  /home/mabr3112/ProtFlow/examples/rfdiffusion_m...   

  poses_description  diffusion_plddt  \
0    diff_0001_0002         0.956843   
1    diff_0001_0001         0.941326   
2    diff_0001_0005         0.943400   
3    diff_0001_0004         0.943294   
4    diff_0001_0003         0.958685   

                              diffusion_perres_plddt  \
0  [0.9466934, 0.97783107, 0.9815314, 0.9839835, ...   
1  [0.89204675, 0.9132011, 0.9310032, 0.9333822, ...   
2  [0.90986276, 0.9491161, 0.93919754, 0.93431914...   
3  [0.9064126, 0.9262741, 0.923507, 0.9394405, 0....   
4  [0.8997437, 0.94538975, 0.93646216, 0.92597026...   

  diffu

# Sequence Design for diffused poses with LigandMPNN

In [6]:
from protflow.tools import ligandmpnn
from protflow.tools.ligandmpnn import LigandMPNN

# setup mover
ligandmpnn_runner = LigandMPNN(jobstarter=sbatch_gpu_jobstarter)

In [7]:
# design 8 sequences per Pose with LigandMPNN
proteins = ligandmpnn_runner.run(
    poses=diffused_proteins,
    prefix="mpnn_design",
    jobstarter=sbatch_gpu_jobstarter,
    nseq=8,
    model_type="soluble_mpnn"
)

# Predict Designed Sequences using ESMFold

In [8]:
from protflow.tools import esmfold
from protflow.tools.esmfold import ESMFold

In [9]:
# set up runner
esmfold_runner = ESMFold(jobstarter=sbatch_gpu_jobstarter)

# start predicting
predicted_proteins = esmfold_runner.run(
    poses=proteins,
    prefix="esm"
)