# Resurface the peptides and their interactions with the hinge for the designs which pass alphafold

### Imports

In [1]:
# %load_ext lab_black
# Python standard library
from glob import glob
import os
import socket
import sys

# 3rd party library imports
import dask
import matplotlib.pyplot as plt
import pandas as pd
import pyrosetta
import numpy as np
import scipy
import seaborn as sns
from tqdm.auto import tqdm  # jupyter compatible progress bar

tqdm.pandas()  # link tqdm to pandas
# Notebook magic
# save plots in the notebook
# %matplotlib inline
# reloads modules automatically before executing cells
# %load_ext autoreload
# %autoreload 2
print(f"running in directory: {os.getcwd()}")  # where are we?
print(f"running on node: {socket.gethostname()}")  # what node are we on?

running in directory: /mnt/home/broerman/crispy_shifty/projects/OPS/round_1/design
running on node: jojo


### Resurface the predicted states peptides

In [4]:
sys.path.insert(0, "/global/cfs/cdirs/m4129/projects/crispy_shifty_adam")
from crispy_shifty.utils.io import gen_array_tasks

simulation_name = "12_resurface"
design_list_file = (
    "/pscratch/sd/b/broerman/11_fold_paired_states_X/folded_paired_states.list"
)
output_path = f"/pscratch/sd/b/broerman/{simulation_name}"
options = " ".join(
    [
        "out:level 200",
    ]
)

extra_kwargs = {
    "models": "1",
    "clean_disulfides": "true",
    "redesign_hinge": "int_surf",
    "path_to_model_weights": "/global/u2/b/broerman/databases/mpnn/vanilla_model_weights/",
}

gen_array_tasks(
    distribute_func="crispy_shifty.protocols.cleaning.finalize_peptide",
    design_list_file=design_list_file,
    output_path=output_path,
    perlmutter_mode=True,
    # queue="gpu-remote",
    # cores=2,
    # gres="--gres=gpu:a4000:1",
    # memory="10G",
    nstruct=1,
    nstruct_per_task=4,
    options=options,
    extra_kwargs=extra_kwargs,
    simulation_name=simulation_name,
    time="1:29:30",
)

Run the following command with your desired environment active:
sbatch -a 1-489 /pscratch/sd/b/broerman/12_resurface/run.sh


In [7]:
from more_itertools import chunked

simulation_name = "12_resurface"
output_path = os.path.join(
    "/home/broerman/crispy_shifty/projects/OPS/round_1/design/", simulation_name
)

with open(f"{output_path}/rethread_chC/tasks.cmds", "w") as f:
    for fnames in chunked(
        sorted(glob(f"{output_path}/decoys_old_chC/*/*.pdb.bz2")), 50
    ):
        f.write(
            "/home/broerman/crispy_shifty/projects/OPS/round_1/design/12_resurface/rethread_chC/12_resurface_rethread_chC.py "
            + " ".join(fnames)
            + "\n"
        )

In [None]:
# # Forgot to thread the chain A sequence on chain C. Do this here.

# import bz2
# import json
# import pyrosetta.distributed.io as io
# from pyrosetta.distributed import cluster

# sys.path.insert(0, "/home/broerman/crispy_shifty")

# import pyrosetta

# pyrosetta.distributed.maybe_init(
#     **{
#         "options": "-corrections::beta_nov16 true",
#         "extra_options": {"-out:level": "100"},
#     }
# )

# simulation_name = "12_resurface"
# output_path = os.path.join(
#     "/home/broerman/crispy_shifty/projects/OPS/round_1/design/", simulation_name
# )

# for fname in tqdm(glob(f"{output_path}/decoys_old_chC/*/*.pdb.bz2")):
#     with open(fname, "rb") as f:  # read bz2 bytestream, decompress and decode
#         decoy = io.to_pose(io.pose_from_pdbstring(bz2.decompress(f.read()).decode()))
#     scores = pyrosetta.distributed.cluster.get_scores_dict(fname)
#     for key, value in scores["scores"].items():
#         pyrosetta.rosetta.core.pose.setPoseExtraScore(decoy, key, value)

#     # get the chA sequence
#     chA_seq = decoy.chain_sequence(1)
#     # setup SimpleThreadingMover
#     stm = pyrosetta.rosetta.protocols.simple_moves.SimpleThreadingMover()
#     # thread the sequence from chA onto chA
#     stm.set_sequence(chA_seq, start_position=decoy.chain_begin(3))
#     stm.apply(decoy)

#     out_fname = fname.replace("decoys_old_chC", "decoys")

#     pdbfile_data = json.dumps(scores)
#     # Write full .pdb record
#     pdbstring_data = (
#         io.to_pdbstring(decoy) + os.linesep + "REMARK PyRosettaCluster: " + pdbfile_data
#     )
#     with open(out_fname, "wb") as f:
#         f.write(bz2.compress(str.encode(pdbstring_data)))

In [2]:
sys.path.insert(0, "/home/broerman/crispy_shifty")
from crispy_shifty.utils.io import collect_score_file

simulation_name = "12_resurface"
output_path = os.path.join("/home/broerman/crispy_shifty/projects/OPS/round_1/design/", simulation_name)

if not os.path.exists(os.path.join(output_path, "scores.json")):
    collect_score_file(output_path, "scores")

  from distributed.utils import tmpfile


### Load resulting concatenated scorefile

In [3]:
from crispy_shifty.utils.io import parse_scorefile_linear

simulation_name = "12_resurface"
output_path = os.path.join(
    "/home/broerman/crispy_shifty/projects/OPS/round_1/design/", simulation_name
)

if not os.path.exists(os.path.join(output_path, "scores.csv")):
    scores_df = parse_scorefile_linear(os.path.join(output_path, "scores.json"))

  0%|          | 0/4317 [00:00<?, ?it/s]

### Dump scores_df as a CSV and then reload, for performance reasons

In [4]:
if not os.path.exists(os.path.join(output_path, "scores.csv")):
    scores_df.to_csv(os.path.join(output_path, "scores.csv"))

scores_df = pd.read_csv(os.path.join(output_path, "scores.csv"), index_col="Unnamed: 0")

### Save a list of outputs

In [5]:
with open(os.path.join(output_path, "resurfaced_states.list"), "w") as f:
    for path in tqdm(scores_df.index):
        print(path, file=f)

  0%|          | 0/4317 [00:00<?, ?it/s]