In [1]:
working_dimers = ['CSD20', 'CSD23', 'CSD25', 'CSD26']

with open('../af2/good_models.list', 'r') as f:
    good_models = f.read().splitlines()

# working_models = [x for x in good_models if x.split('/')[-1][:5] in working_dimers]

working_models = []

for working_dimer in working_dimers:
    models_all = [x for x in good_models if x.split('/')[-1][:5] == working_dimer]
    models_A = [x for x in models_all if x.split('/')[-1].startswith(working_dimer + 'A_')]
    models_B = [x for x in models_all if x.split('/')[-1].startswith(working_dimer + 'B_')]
    models_AB = [x for x in models_all if x.split('/')[-1].startswith(working_dimer + 'AB_')]

    working_models.append([models_A, models_B, models_AB, models_AB])

In [2]:
import pyrosetta
pyrosetta.distributed.maybe_init(**{
    "options": "-corrections::beta_nov16 true",
    "extra_options":{
        "-out:level": "100",
}})

In [4]:
# make all combinations of backbones from different AF2 models for each SEC-validated heterodimer
# this will provide MPNN with a good set of backbone diversity for MSD

from itertools import product
from copy import deepcopy

import sys
sys.path.insert(0, '/home/broerman/projects/crispy_shifty/')
from crispy_shifty.protocols.states import yeet_pose_xyz

sw = pyrosetta.rosetta.protocols.simple_moves.SwitchChainOrderMover()
sw.chain_order('123456')

for models_A_B_AB, working_dimer in zip(working_models, working_dimers):

    poses_A_B_AB = []
    for model_group, yeet_coords, split in zip(models_A_B_AB, [(1,0,0), (0,1,0), (-1,0,0), (0,-1,0)], [False,False,True,True]):
        pose_group = []
        for pdb_fname in model_group:
            pose = pyrosetta.pose_from_pdb(pdb_fname)
            model_num = pdb_fname.split('/')[-1].split('_')[2]

            if yeet_coords:
                pose = yeet_pose_xyz(pose, yeet_coords)

            if split:
                pose = pose.split_by_chain()

            pose_group.append((pose, model_num))
        poses_A_B_AB.append(pose_group)

    for (pose_A, model_A), (pose_B, model_B), (poses_AB_1, model_AB_1), (poses_AB_2, model_AB_2) in product(*poses_A_B_AB):
        combined_pose = deepcopy(pose_A)
        pyrosetta.rosetta.core.pose.append_pose_to_pose(combined_pose, deepcopy(pose_B), True)
        pyrosetta.rosetta.core.pose.append_pose_to_pose(combined_pose, deepcopy(poses_AB_1[1]), True)
        pyrosetta.rosetta.core.pose.append_pose_to_pose(combined_pose, deepcopy(poses_AB_1[2]), True)
        pyrosetta.rosetta.core.pose.append_pose_to_pose(combined_pose, deepcopy(poses_AB_2[1]), True)
        pyrosetta.rosetta.core.pose.append_pose_to_pose(combined_pose, deepcopy(poses_AB_2[2]), True)
        sw.apply(combined_pose)
        combined_pose.dump_pdb('01_backbone_combos/' + working_dimer + '_' + model_A + model_B + model_AB_1 + model_AB_2 + '.pdb')

In [2]:
from glob import glob
with open('01_backbone_combos.list', 'w') as f:
    for bb_combo_fname in glob('/home/broerman/projects/CSD/round_2/mpnn_upweight_int/01_backbone_combos/*.pdb'):
        f.write(bb_combo_fname + '\n')

#### Run MPNN on the state combinations ####

In [4]:
import sys
sys.path.insert(0, '/home/broerman/projects/crispy_shifty/')

from crispy_shifty.utils.io import gen_array_tasks

simulation_name = "02_mpnn_upweight_int"
design_list_file = "/home/broerman/projects/CSD/round_2/mpnn_upweight_int/01_backbone_combos.list"
output_path = f"/home/broerman/projects/CSD/round_2/mpnn_upweight_int/{simulation_name}"

options = " ".join(
    [
        "out:level 200",
    ]
)

extra_kwargs = {
    "num_sequences": "10",
    "batch_size" = "10",
    "mpnn_temperature": 0.2,
    "mpnn_design_area": "scan",
}

gen_array_tasks(
    distribute_func="crispy_shifty.protocols.mpnn.mpnn_dimers",
    design_list_file=design_list_file,
    output_path=output_path,
    queue="short",
    memory="12G",
    nstruct=1,
    nstruct_per_task=1,
    options=options,
    extra_kwargs=extra_kwargs,
    simulation_name=simulation_name,
)

Run the following command with your desired environment active:
sbatch -a 1-287 /home/broerman/projects/CSD/round_2/mpnn_upweight_int/02_mpnn_upweight_int/run.sh


#### Collect scorefiles of the MPNN designed paired states and concatenate ####

In [5]:
import os
from crispy_shifty.utils.io import collect_score_file

simulation_name = "02_mpnn_upweight_int"
output_path = f"/home/broerman/projects/CSD/round_2/mpnn_upweight_int/{simulation_name}"

if not os.path.exists(os.path.join(output_path, "scores.json")):
    collect_score_file(output_path, "scores")

#### Load resulting concatenated scorefile ####

In [6]:
sys.path.insert(0, "/projects/crispy_shifty")
from crispy_shifty.utils.io import parse_scorefile_linear

scores_df = parse_scorefile_linear(os.path.join(output_path, "scores.json"))
scores_df = scores_df.convert_dtypes()

100%|██████████| 861/861 [00:03<00:00, 229.05it/s]


#### Save individual fastas ####

In [7]:
from crispy_shifty.utils.io import df_to_fastas

scores_df = df_to_fastas(scores_df, prefix="mpnn_seq")

100%|██████████| 861/861 [00:02<00:00, 374.08it/s]


#### Save a list of outputs ####

In [9]:
from tqdm.auto import tqdm
with open(os.path.join(output_path, "mpnn_paired_states.list"), "w") as f:
    for path in tqdm(scores_df.index):
        print(path, file=f)

100%|██████████| 861/861 [00:00<00:00, 1119434.51it/s]


#### Concat the pdb.bz2 and fasta paths into a single list, for Superfold reasons ####

In [10]:
with open(os.path.join(output_path, "mpnn_paired_states.pair"), "w") as f:
    for path in tqdm(scores_df.index):
        line = path + "____" + path.replace("decoys", "fastas").replace("pdb.bz2", "fa")
        print(line, file=f)

100%|██████████| 861/861 [00:00<00:00, 745057.92it/s]
