# Run MPNN interface design on the bound states

### Imports

In [1]:
%load_ext lab_black
# Python standard library
from glob import glob
import os
import socket
import sys

# 3rd party library imports
import dask
import matplotlib.pyplot as plt
import pandas as pd
import pyrosetta
import numpy as np
import scipy
import seaborn as sns
from tqdm.auto import tqdm  # jupyter compatible progress bar

tqdm.pandas()  # link tqdm to pandas
# Notebook magic
# save plots in the notebook
%matplotlib inline
# reloads modules automatically before executing cells
%load_ext autoreload
%autoreload 2
print(f"running in directory: {os.getcwd()}")  # where are we?
print(f"running on node: {socket.gethostname()}")  # what node are we on?

running in directory: /mnt/home/pleung/projects/crispy_shifty/projects/crispy_shifties
running on node: dig42


### Set working directory to the root of the crispy_shifty repo
TODO set to projects dir

In [2]:
os.chdir("/home/pleung/projects/crispy_shifty")
# os.chdir("/projects/crispy_shifty")

### Run MPNN on the interfaces
TODO

In [3]:
from crispy_shifty.utils.io import gen_array_tasks

simulation_name = "02_mpnn_bound_states"
design_list_file = os.path.join(
    os.getcwd(), "projects/crispy_shifties/01_loop_bound_states/looped_states.list"
)
output_path = os.path.join(os.getcwd(), f"projects/crispy_shifties/{simulation_name}")
options = " ".join(
    [
        "out:level 200",
    ]
)

gen_array_tasks(
    distribute_func="crispy_shifty.protocols.mpnn.mpnn_bound_state",
    design_list_file=design_list_file,
    output_path=output_path,
    queue="medium",
    memory="4G",
    nstruct=1,
    nstruct_per_task=1,
    options=options,
    simulation_name=simulation_name,
)

  from distributed.utils import tmpfile


https://docs.anaconda.com/anaconda/install

Run the following command with your desired environment active:
sbatch -a 1-$(cat /mnt/home/pleung/projects/crispy_shifty/projects/crispy_shifties/02_mpnn_bound_states/tasks.cmds | wc -l) /mnt/home/pleung/projects/crispy_shifty/projects/crispy_shifties/02_mpnn_bound_states/run.sh


In [4]:
!sbatch -a 1-$(cat /mnt/home/pleung/projects/crispy_shifty/projects/crispy_shifties/02_mpnn_bound_states/tasks.cmds | wc -l) /mnt/home/pleung/projects/crispy_shifty/projects/crispy_shifties/02_mpnn_bound_states/run.sh

Submitted batch job 5826055


### Collect scorefiles of designed bound states and concatenate
TODO change to projects dir

In [3]:
sys.path.insert(0, "~/projects/crispy_shifty")  # TODO
from crispy_shifty.utils.io import collect_score_file

simulation_name = "02_mpnn_bound_states"
output_path = os.path.join(os.getcwd(), f"projects/crispy_shifties/{simulation_name}")

if not os.path.exists(os.path.join(output_path, "scores.json")):
    collect_score_file(output_path, "scores")

  from distributed.utils import tmpfile


https://docs.anaconda.com/anaconda/install



### Load resulting concatenated scorefile
TODO change to projects dir

In [4]:
sys.path.insert(0, "~/projects/crispy_shifty")  # TODO
from crispy_shifty.utils.io import parse_scorefile_linear

output_path = os.path.join(os.getcwd(), f"projects/crispy_shifties/{simulation_name}")

scores_df = parse_scorefile_linear(os.path.join(output_path, "scores.json"))
scores_df = scores_df.convert_dtypes()

  0%|          | 0/1305 [00:00<?, ?it/s]

### Setup for plotting

In [5]:
sns.set(
    context="talk",
    font_scale=1,  # make the font larger; default is pretty small
    style="ticks",  # make the background white with black lines
    palette="colorblind",  # a color palette that is colorblind friendly!
)

### Data exploration
Gonna remove the Rosetta sfxn scoreterms for now

In [6]:
from crispy_shifty.protocols.design import beta_nov16_terms

scores_df = scores_df[
    [term for term in scores_df.columns if term not in beta_nov16_terms]
]
print(len(scores_df))
print(list(scores_df.columns))

1305
['bb_clash', 'best_average_DAN_plddts', 'best_average_plddts', 'best_model', 'best_ptm', 'best_rmsd_to_input', 'buns_parent', 'cms_AcB', 'cms_AnAc', 'cms_AnAcB', 'cms_AnB', 'docked_helix', 'dssp', 'exposed_hydrophobics_parent', 'geometry_parent', 'holes_all_parent', 'holes_core_parent', 'loop_dist', 'loop_sc', 'looped_length', 'mismatch_probability_parent', 'mpnn_seq_0000', 'mpnn_seq_0001', 'mpnn_seq_0002', 'mpnn_seq_0003', 'mpnn_seq_0004', 'mpnn_seq_0005', 'mpnn_seq_0006', 'mpnn_seq_0007', 'mpnn_seq_0008', 'mpnn_seq_0009', 'mpnn_seq_0010', 'mpnn_seq_0011', 'mpnn_seq_0012', 'mpnn_seq_0013', 'mpnn_seq_0014', 'mpnn_seq_0015', 'mpnn_seq_0016', 'mpnn_seq_0017', 'mpnn_seq_0018', 'mpnn_seq_0019', 'mpnn_seq_0020', 'mpnn_seq_0021', 'mpnn_seq_0022', 'mpnn_seq_0023', 'mpnn_seq_0024', 'mpnn_seq_0025', 'mpnn_seq_0026', 'mpnn_seq_0027', 'mpnn_seq_0028', 'mpnn_seq_0029', 'mpnn_seq_0030', 'mpnn_seq_0031', 'mpnn_seq_0032', 'mpnn_seq_0033', 'mpnn_seq_0034', 'mpnn_seq_0035', 'mpnn_seq_0036', 'mpnn_

### Save individual fastas
TODO change to projects dir

In [7]:
sys.path.insert(0, "~/projects/crispy_shifty")  # TODO
from crispy_shifty.utils.io import df_to_fastas

output_path = os.path.join(os.getcwd(), f"projects/crispy_shifties/{simulation_name}")

scores_df = df_to_fastas(scores_df, prefix="mpnn_seq")

  0%|          | 0/1305 [00:00<?, ?it/s]

  df["fasta_path"] = df.progress_apply(mask, args=(out_path), axis=1)


### Save a list of outputs

In [8]:
simulation_name = "02_mpnn_bound_states"
output_path = os.path.join(os.getcwd(), f"projects/crispy_shifties/{simulation_name}")

with open(os.path.join(output_path, "mpnn_states.list"), "w") as f:
    for path in tqdm(scores_df.index):
        print(path, file=f)

  0%|          | 0/1305 [00:00<?, ?it/s]

### Prototyping blocks

test `mpnn_bound_state`

In [3]:
%%time 
import pyrosetta

pyrosetta.init()


sys.path.insert(0, "~/projects/crispy_shifty/") # TODO projects
from crispy_shifty.protocols.mpnn import mpnn_bound_state

t = mpnn_bound_state(
        None,
        **{
            'pdb_path': '/mnt/home/pleung/projects/crispy_shifty/projects/crispy_shifties/01_loop_bound_states/decoys/0001/01_loop_bound_states_17f57e75865441a78a0057fb8081b4de.pdb.bz2',
        }
)

PyRosetta-4 2021 [Rosetta PyRosetta4.conda.linux.cxx11thread.serialization.CentOS.python38.Release 2021.50+release.4ff291ed8257ec2d0cd4d96ae4289e1d39ce1007 2021-12-16T00:25:15] retrieved from: http://www.pyrosetta.org
(C) Copyright Rosetta Commons Member Institutions. Created in JHU by Sergey Lyskov and PyRosetta Team.
core.init: Checking for fconfig files in pwd and ./rosetta/flags
core.init: Rosetta version: PyRosetta4.conda.linux.cxx11thread.serialization.CentOS.python38.Release r306 2021.50+release.4ff291e 4ff291ed8257ec2d0cd4d96ae4289e1d39ce1007 http://www.pyrosetta.org 2021-12-16T00:25:15
core.init: command: PyRosetta -ex1 -ex2aro -database /projects/crispy_shifty/envs/crispy/lib/python3.8/site-packages/pyrosetta/database
basic.random.init_random_generator: 'RNG device' seed mode, using '/dev/urandom', seed=-1028250697 seed_offset=0 real_seed=-1028250697 thread_index=0
basic.random.init_random_generator: RandomGenerator:init: Normal mode, seed=-1028250697 RG_type=mt19937
CPU time

In [4]:
for i, tppose in enumerate(t):
    tppose.pose.dump_pdb(f"{i}.pdb")

  from distributed.utils import tmpfile


https://docs.anaconda.com/anaconda/install

core.chemical.GlobalResidueTypeSet: Finished initializing fa_standard residue type set.  Created 983 residue types
core.chemical.GlobalResidueTypeSet: Total time to initialize 1.04848 seconds.
0.05 min: Setting up design selector
0.05 min: Designing interface with MPNN
Discarded {'bad_chars': 0, 'too_long': 0, 'bad_seq_length': 0}
Fixed positions dictionary is loaded
PSSM is NOT loaded, or NOT provided
Omit AA dictionary is NOT loaded, or NOT provided
AA bias dictionary is not loaded, or not provided
Generating sequences...
Generating sequences for: tmp
64 sequences of length 264 generated in 254.5058 seconds

4.68 min: MPNN design complete, updating pose datacache


In [5]:
tppose.pose.scores

{'bb_clash': '181.11883544921875',
 'best_average_DAN_plddts': '0.9458007812',
 'best_average_plddts': '93.5457403353',
 'best_model': '2',
 'best_ptm': '0.9095557015',
 'best_rmsd_to_input': '0.7205311737',
 'buns_parent': '0.0',
 'cms_AcB': '349.0079040527344',
 'cms_AnAc': '565.113037109375',
 'cms_AnAcB': '651.3552856445312',
 'cms_AnB': '323.6772155761719',
 'docked_helix': '4',
 'dssp': 'LHHHHHHHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHHHHLLLHHHHHHHHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHHHHHHHLLLLHHHHHHHHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHHHHLLLHHHHHHHHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHL',
 'exposed_hydrophobics_parent': '101.03326416015625',
 'geometry_parent': '1.0',
 'holes_all_parent': '-1.4798530340194702',
 'holes_core_parent': '-1.9513436555862427',
 'loop_dist': '12.737789269491895',
 'loop_sc': '0.6821925849346034',
 'looped_length': '230.0',
 'mismatch_probability_parent': '0.2606607973575592',
 'mpnn_seq_0000

In [None]:
import pyrosetta.distributed.viewer as viewer

ppose = pyrosetta.distributed.io.pose_from_file("test.pdb")
view = viewer.init(ppose, window_size=(1600, 1200))
view.add(viewer.setStyle())
view.add(viewer.setStyle(colorscheme="whiteCarbon", radius=0.10))
view.add(viewer.setHydrogenBonds())
view.add(viewer.setHydrogens(polar_only=True))
view.add(viewer.setDisulfides(radius=0.25))
view()