# Resurface the peptides and their interactions with the hinge for the designs which pass alphafold

### Imports

In [1]:
# %load_ext lab_black
# Python standard library
from glob import glob
import os
import socket
import sys

# 3rd party library imports
import dask
import matplotlib.pyplot as plt
import pandas as pd
import pyrosetta
import numpy as np
import scipy
import seaborn as sns
from tqdm.auto import tqdm  # jupyter compatible progress bar

tqdm.pandas()  # link tqdm to pandas
# Notebook magic
# save plots in the notebook
# %matplotlib inline
# reloads modules automatically before executing cells
# %load_ext autoreload
# %autoreload 2
print(f"running in directory: {os.getcwd()}")  # where are we?
print(f"running on node: {socket.gethostname()}")  # what node are we on?

running in directory: /global/cfs/cdirs/m4129/projects/crispy_shifty_adam/projects/OPS/round_1/design
running on node: nid001908


### Resurface the predicted states peptides

In [2]:
sys.path.insert(0, "/global/cfs/cdirs/m4129/projects/crispy_shifty_adam")
from crispy_shifty.utils.io import gen_array_tasks

simulation_name = "12_resurface_with_hinge"
design_list_file = "/pscratch/sd/b/broerman/11_fold_paired_states_X/folded_paired_states.list"
output_path = f"/pscratch/sd/b/broerman/{simulation_name}"
options = " ".join(
    [
        "out:level 200",
    ]
)

extra_kwargs = {
    "models": "1",
    "clean_disulfides": "true",
    "redesign_hinge": "full_surf",
    "path_to_model_weights": "/global/u2/b/broerman/databases/mpnn/vanilla_model_weights/",
}

gen_array_tasks(
    distribute_func="crispy_shifty.protocols.cleaning.finalize_peptide",
    design_list_file=design_list_file,
    output_path=output_path,
    perlmutter_mode=True,
    # queue="gpu-remote",
    # cores=2,
    # gres="--gres=gpu:a4000:1",
    # memory="10G",
    nstruct=1,
    nstruct_per_task=4,
    options=options,
    extra_kwargs=extra_kwargs,
    simulation_name=simulation_name,
    time="1:29:30",
)

  from distributed.utils import tmpfile


Run the following command with your desired environment active:
sbatch -a 1-489 /pscratch/sd/b/broerman/12_resurface_with_hinge/run.sh


In [None]:
# also generate a set with a much more minimal hinge surface redesign

sys.path.insert(0, "/global/cfs/cdirs/m4129/projects/crispy_shifty_adam")
from crispy_shifty.utils.io import gen_array_tasks

simulation_name = "12_resurface_with_hinge"
design_list_file = "/pscratch/sd/b/broerman/11_fold_paired_states_X/folded_paired_states.list"
output_path = f"/pscratch/sd/b/broerman/{simulation_name}"
options = " ".join(
    [
        "out:level 200",
    ]
)

extra_kwargs = {
    "models": "1",
    "clean_disulfides": "true",
    "redesign_hinge": "int_surf",
    "path_to_model_weights": "/global/u2/b/broerman/databases/mpnn/vanilla_model_weights/",
}

gen_array_tasks(
    distribute_func="crispy_shifty.protocols.cleaning.finalize_peptide",
    design_list_file=design_list_file,
    output_path=output_path,
    perlmutter_mode=True,
    # queue="gpu-remote",
    # cores=2,
    # gres="--gres=gpu:a4000:1",
    # memory="10G",
    nstruct=1,
    nstruct_per_task=4,
    options=options,
    extra_kwargs=extra_kwargs,
    simulation_name=simulation_name,
    time="1:29:30",
)

In [None]:
with open("/pscratch/sd/b/broerman/12_resurface_with_hinge/resurfaced_with_hinge.list", "w") as f:
    for path in sorted(glob("/pscratch/sd/b/broerman/12_resurface_with_hinge/resurfaced_with_hinge/decoys/*/*.pdb.bz2")):
        print(path, file=f)

In [None]:
sys.path.insert(0, "/global/cfs/cdirs/m4129/projects/crispy_shifty_adam")
from crispy_shifty.utils.io import gen_array_tasks

simulation_name = "12_resurface_only_peptide"
design_list_file = "/pscratch/sd/b/broerman/12_resurface_with_hinge/resurfaced_with_hinge.list"
output_path = f"/pscratch/sd/b/broerman/{simulation_name}"
options = " ".join(
    [
        "out:level 200",
    ]
)

extra_kwargs = {
    "models": "1",
    "clean_disulfides": "true",
    "path_to_model_weights": "/global/u2/b/broerman/databases/mpnn/vanilla_model_weights/",
}

gen_array_tasks(
    distribute_func="crispy_shifty.protocols.cleaning.finalize_peptide",
    design_list_file=design_list_file,
    output_path=output_path,
    perlmutter_mode=True,
    # queue="gpu-remote",
    # cores=2,
    # gres="--gres=gpu:a4000:1",
    # memory="10G",
    nstruct=1,
    nstruct_per_task=4,
    options=options,
    extra_kwargs=extra_kwargs,
    simulation_name=simulation_name,
    time="1:29:30",
)

rsync back to digs

In [2]:
sys.path.insert(0, "/home/broerman/crispy_shifty")
from crispy_shifty.utils.io import collect_score_file

simulation_name = "12_resurface"
output_path = os.path.join("/home/broerman/crispy_shifty/projects/OPS/round_1/design/", simulation_name)

if not os.path.exists(os.path.join(output_path, "scores.json")):
    collect_score_file(output_path, "scores")

  from distributed.utils import tmpfile


### Load resulting concatenated scorefile

In [3]:
from crispy_shifty.utils.io import parse_scorefile_linear

simulation_name = "12_resurface"
output_path = os.path.join(
    "/home/broerman/crispy_shifty/projects/OPS/round_1/design/", simulation_name
)

if not os.path.exists(os.path.join(output_path, "scores.csv")):
    scores_df = parse_scorefile_linear(os.path.join(output_path, "scores.json"))

  0%|          | 0/4317 [00:00<?, ?it/s]

### Dump scores_df as a CSV and then reload, for performance reasons

In [4]:
if not os.path.exists(os.path.join(output_path, "scores.csv")):
    scores_df.to_csv(os.path.join(output_path, "scores.csv"))

scores_df = pd.read_csv(os.path.join(output_path, "scores.csv"), index_col="Unnamed: 0")

### Save a list of outputs

In [5]:
with open(os.path.join(output_path, "resurfaced_states.list"), "w") as f:
    for path in tqdm(scores_df.index):
        print(path, file=f)

  0%|          | 0/4317 [00:00<?, ?it/s]