# Run MPNN multistate design on the paired states

### Imports

In [1]:
%load_ext lab_black
# Python standard library
from glob import glob
import os
import socket
import sys

# 3rd party library imports
import dask
import matplotlib.pyplot as plt
import pandas as pd
import pyrosetta
import numpy as np
import scipy
import seaborn as sns
from tqdm.auto import tqdm  # jupyter compatible progress bar

tqdm.pandas()  # link tqdm to pandas
# Notebook magic
# save plots in the notebook
%matplotlib inline
# reloads modules automatically before executing cells
%load_ext autoreload
%autoreload 2
print(f"running in directory: {os.getcwd()}")  # where are we?
print(f"running on node: {socket.gethostname()}")  # what node are we on?

running in directory: /mnt/home/pleung/projects/crispy_shifty/projects/crispy_shifties
running on node: dig201


### Set working directory to the root of the crispy_shifty repo
TODO set to projects dir

In [2]:
os.chdir("/home/pleung/projects/crispy_shifty")
# os.chdir("/projects/crispy_shifty")

### Run MPNN on the paired states
TODO

In [None]:
from crispy_shifty.utils.io import gen_array_tasks

simulation_name = "06_mpnn_paired_states"
design_list_file = os.path.join(
    os.getcwd(),
    "projects/crispy_shifties/05_design_paired_states/designed_paired_states.list",
)
output_path = os.path.join(os.getcwd(), f"projects/crispy_shifties/{simulation_name}")

options = " ".join(
    [
        "out:level 200",
    ]
)

extra_kwargs = {
    "num_sequences": "100",
}

gen_array_tasks(
    distribute_func="crispy_shifty.protocols.mpnn.mpnn_paired_state",
    design_list_file=design_list_file,
    output_path=output_path,
    queue="medium",
    memory="8G",
    nstruct=1,
    nstruct_per_task=1,
    options=options,
    extra_kwargs=extra_kwargs,
    simulation_name=simulation_name,
)

### Collect scorefiles of designed paired states and concatenate
TODO change to projects dir

In [4]:
# sys.path.insert(0, "~/projects/crispy_shifty")  # TODO
# from crispy_shifty.utils.io import collect_score_file

# simulation_name = "06_mpnn_paired_states"
# output_path = os.path.join(os.getcwd(), f"projects/crispy_shifties/{simulation_name}")

# if not os.path.exists(os.path.join(output_path, "scores.json")):
#     collect_score_file(output_path, "scores")

### Load resulting concatenated scorefile
TODO change to projects dir

In [5]:
# sys.path.insert(0, "~/projects/crispy_shifty")  # TODO
# from crispy_shifty.utils.io import parse_scorefile_linear

# output_path = os.path.join(os.getcwd(), f"projects/crispy_shifties/{simulation_name}")

# scores_df = parse_scorefile_linear(os.path.join(output_path, "scores.json"))
# scores_df = scores_df.convert_dtypes()

### Setup for plotting

In [6]:
# sns.set(
#     context="talk",
#     font_scale=1,  # make the font larger; default is pretty small
#     style="ticks",  # make the background white with black lines
#     palette="colorblind",  # a color palette that is colorblind friendly!
# )

### Data exploration
Gonna remove the Rosetta sfxn scoreterms for now

In [7]:
# from crispy_shifty.protocols.design import beta_nov16_terms

# scores_df = scores_df[
#     [term for term in scores_df.columns if term not in beta_nov16_terms]
# ]
# print(len(scores_df))

In [8]:
# print(list(scores_df.columns))

In [9]:
# from crispy_shifty.plotting.utils import histplot_df, pairplot_df

### Save individual fastas
TODO change to projects dir

In [10]:
# sys.path.insert(0, "~/projects/crispy_shifty")  # TODO
# from crispy_shifty.utils.io import df_to_fastas

# output_path = os.path.join(os.getcwd(), f"projects/crispy_shifties/{simulation_name}")

# scores_df = df_to_fastas(scores_df, prefix="mpnn_seq")

### Save a list of outputs

In [11]:
# simulation_name = "06_mpnn_paired_states"
# output_path = os.path.join(os.getcwd(), f"projects/crispy_shifties/{simulation_name}")

# with open(os.path.join(output_path, "mpnn_states.list"), "w") as f:
#     for path in tqdm(scores_df.index):
#         print(path, file=f)

### Prototyping blocks

test `mpnn_paired_state`

In [12]:
%%time 
import pyrosetta

pyrosetta.init()


sys.path.insert(0, "~/projects/crispy_shifty/") # TODO projects
from crispy_shifty.protocols.mpnn import mpnn_paired_state

t = mpnn_paired_state(
        None,
        **{
            'pdb_path': '/mnt/home/pleung/projects/crispy_shifty/projects/crispy_shifties/05_design_paired_states/decoys/0000/05_design_paired_states_c6be6ebc8a3146e2960cb45360a8a202.pdb.bz2',
            'num_sequences': 100,
        }
)
for i, tppose in enumerate(t):
    tppose.pose.dump_pdb(f"{i}.pdb")

PyRosetta-4 2021 [Rosetta PyRosetta4.conda.linux.cxx11thread.serialization.CentOS.python38.Release 2021.50+release.4ff291ed8257ec2d0cd4d96ae4289e1d39ce1007 2021-12-16T00:25:15] retrieved from: http://www.pyrosetta.org
(C) Copyright Rosetta Commons Member Institutions. Created in JHU by Sergey Lyskov and PyRosetta Team.
core.init: Checking for fconfig files in pwd and ./rosetta/flags
core.init: Rosetta version: PyRosetta4.conda.linux.cxx11thread.serialization.CentOS.python38.Release r306 2021.50+release.4ff291e 4ff291ed8257ec2d0cd4d96ae4289e1d39ce1007 http://www.pyrosetta.org 2021-12-16T00:25:15
core.init: command: PyRosetta -ex1 -ex2aro -database /projects/crispy_shifty/envs/crispy/lib/python3.8/site-packages/pyrosetta/database
basic.random.init_random_generator: 'RNG device' seed mode, using '/dev/urandom', seed=965614043 seed_offset=0 real_seed=965614043 thread_index=0
basic.random.init_random_generator: RandomGenerator:init: Normal mode, seed=965614043 RG_type=mt19937


  from distributed.utils import tmpfile


https://docs.anaconda.com/anaconda/install

core.chemical.GlobalResidueTypeSet: Finished initializing fa_standard residue type set.  Created 983 residue types
core.chemical.GlobalResidueTypeSet: Total time to initialize 0.936086 seconds.
0.04 min: Setting up design selector
0.04 min: Multistate design with MPNN
----------------------------------------
pssm_jsonl is NOT loaded
----------------------------------------
omit_AA_jsonl is NOT loaded
----------------------------------------
bias_AA_jsonl is NOT loaded
----------------------------------------
discarded {'bad_chars': 0, 'too_long': 0, 'bad_seq_length': 0}
----------------------------------------
Generating sequences...
Generating sequences for: tmp
96 sequences of length 271 generated in 245.2654 seconds

4.36 min: MPNN design complete, updating pose datacache
CPU times: user 2.48 s, sys: 133 ms, total: 2.62 s
Wall time: 4min 21s


In [13]:
d = dict(tppose.pose.scores)
#
d

{'bb_clash': '354.65374755859375',
 'bb_clash_delta_x': '4.282957553863525',
 'best_average_DAN_plddts': '0.8857421875',
 'best_average_plddts': '93.4143695816',
 'best_model': '1',
 'best_ptm': '0.7924102078',
 'best_rmsd_to_input': '1.1456646473',
 'buns_parent': '2.0',
 'cms_AcB': '283.65777587890625',
 'cms_AnAc': '379.62530517578125',
 'cms_AnAcB': '676.42626953125',
 'cms_AnB': '415.84246826171875',
 'designed_by': 'mpnn',
 'docked_helix': '3',
 'dslf_fa13': '0.0',
 'dssp': 'LHHHHHHHHHHHHHHHHHHHHHHLLLHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHLLLLLHHHHHHHHHHHHHHHHHHHHHHHHHLLLHHHHHHHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHL',
 'elapsed_time': '4.584493637084961',
 'exposed_hydrophobics_parent': '393.2328186035156',
 'fa_atr': '-1651.9054956825883',
 'fa_dun': '308.7316428960589',
 'fa_dun_dev': '44.33693400182033',
 'fa_dun_rot': '240.5004192452915',
 'fa_dun_semi': '333.3086706701911',
 'fa_elec': '-612.752887610362',
 'fa_intra_atr_xover4': '-104.92247422454138',
 'fa_intra_el