# Run hacked AlphaFold2 on the designed bound states

### Imports

In [1]:
%load_ext lab_black
# Python standard library
from glob import glob
import os
import socket
import sys

# 3rd party library imports
import dask
import matplotlib.pyplot as plt
import pandas as pd
import pyrosetta
import numpy as np
import scipy
import seaborn as sns
from tqdm.auto import tqdm  # jupyter compatible progress bar

tqdm.pandas()  # link tqdm to pandas
# Notebook magic
# save plots in the notebook
%matplotlib inline
# reloads modules automatically before executing cells
%load_ext autoreload
%autoreload 2
print(f"running in directory: {os.getcwd()}")  # where are we?
print(f"running on node: {socket.gethostname()}")  # what node are we on?

running in directory: /mnt/home/pleung/projects/crispy_shifty/projects/crispy_shifties
running on node: gpu25


### Set working directory to the root of the crispy_shifty repo
TODO set to projects dir

In [2]:
os.chdir("/home/pleung/projects/crispy_shifty")
# os.chdir("/projects/crispy_shifty")

### Run AF2 on the designed bound states
TODO

In [3]:
from crispy_shifty.utils.io import gen_array_tasks

simulation_name = "03_fold_bound_states"
design_list_file = os.path.join(
    os.getcwd(),
    "projects/crispy_shifties/02_mpnn_bound_states/test_mpnn_states.pair",  # TODO
)
output_path = os.path.join(os.getcwd(), f"projects/crispy_shifties/{simulation_name}")

options = " ".join(
    [
        "out:level 200",
    ]
)
extra_kwargs = {"models": "1"}

gen_array_tasks(
    distribute_func="crispy_shifty.protocols.folding.fold_bound_state",
    design_list_file=design_list_file,
    output_path=output_path,
    queue="gpu",  # TODO
    cores=2,
    memory="16G",  # TODO
    gres="--gres=gpu:rtx2080:1",  # TODO
    # TODO perlmutter_mode=True,
    nstruct=1,
    nstruct_per_task=1,
    options=options,
    extra_kwargs = extra_kwargs,
    simulation_name=simulation_name,
)

  from distributed.utils import tmpfile


https://docs.anaconda.com/anaconda/install



InvalidGitRepositoryError: The working directory is dirty! Commit local changes to ensure reproducibility.

In [None]:
# !sbatch -a 1-$(cat /mnt/home/pleung/projects/crispy_shifty/projects/crispy_shifties/02_mpnn_bound_states/tasks.cmds | wc -l) /mnt/home/pleung/projects/crispy_shifty/projects/crispy_shifties/02_mpnn_bound_states/run.sh

### Collect scorefiles of designed bound states and concatenate
TODO change to projects dir

In [None]:
# sys.path.insert(0, "~/projects/crispy_shifty")  # TODO
# from crispy_shifty.utils.io import collect_score_file

# simulation_name = "03_fold_bound_states"
# output_path = os.path.join(os.getcwd(), f"projects/crispy_shifties/{simulation_name}")

# if not os.path.exists(os.path.join(output_path, "scores.json")):
#     collect_score_file(output_path, "scores")

### Load resulting concatenated scorefile
TODO change to projects dir

In [None]:
# sys.path.insert(0, "~/projects/crispy_shifty")  # TODO
# from crispy_shifty.utils.io import parse_scorefile_linear

# output_path = os.path.join(os.getcwd(), f"projects/crispy_shifties/{simulation_name}")

# scores_df = parse_scorefile_linear(os.path.join(output_path, "scores.json"))
# scores_df = scores_df.convert_dtypes()

### Setup for plotting

In [None]:
# sns.set(
#     context="talk",
#     font_scale=1,  # make the font larger; default is pretty small
#     style="ticks",  # make the background white with black lines
#     palette="colorblind",  # a color palette that is colorblind friendly!
# )

### Data exploration
Gonna remove the Rosetta sfxn scoreterms for now

In [None]:
# from crispy_shifty.protocols.design import beta_nov16_terms

# scores_df = scores_df[
#     [term for term in scores_df.columns if term not in beta_nov16_terms]
# ]
# print(len(scores_df))
# scores_df.columns

### Save a list of outputs

In [None]:
# simulation_name = "03_fold_bound_states"
# output_path = os.path.join(os.getcwd(), f"projects/crispy_shifties/{simulation_name}")

# with open(os.path.join(output_path, "folded_states.list"), "w") as f:
#     for path in tqdm(scores_df.index):
#         print(path, file=f)

### Prototyping blocks

test `fold_bound_state`

In [None]:
%%time 
from operator import gt, lt
import pyrosetta

filter_dict = {
    "mean_plddt": (gt, 85.0),
    "rmsd_to_reference": (lt, 2.2),
    "mean_pae_interaction": (lt, 10.0),
}

rank_on = "mean_plddt"
prefix = "mpnn_seq"

pyrosetta.init()


sys.path.insert(0, "~/projects/crispy_shifty/") # TODO projects
from crispy_shifty.protocols.folding import fold_bound_state

t = fold_bound_state(
        None,
        **{
            'fasta_path': '/mnt/home/pleung/projects/crispy_shifty/projects/crispy_shifties/02_mpnn_bound_states/fastas/0000/02_mpnn_bound_states_25a76fae39514121922e2b477b5b9813.fa',
            "filter_dict": filter_dict,
            "models": [1], # TODO
            'pdb_path': '/mnt/home/pleung/projects/crispy_shifty/projects/crispy_shifties/02_mpnn_bound_states/decoys/0000/02_mpnn_bound_states_25a76fae39514121922e2b477b5b9813.pdb.bz2',
            'prefix': prefix,
            'rank_on': rank_on,
#             'fasta_path': 'bar.fa',
#             "models": [1, 2], # TODO
#             'pdb_path': 'foo.pdb.bz2',
            
        }
)
for i, tppose in enumerate(t):
    tppose.pose.dump_pdb(f"{i}.pdb")

In [None]:
tppose.pose.scores

test `generate_decoys_from_pose`

In [None]:
from operator import gt, lt
from crispy_shifty.protocols.folding import generate_decoys_from_pose

filter_dict = {
    "mean_plddt": (gt, 85.0),
    "rmsd_to_reference": (lt, 2.2),
    "mean_pae_interaction": (lt, 10.0),
}

rank_on = "mean_plddt"
prefix = "mpnn_seq"

tpose = tppose.pose.clone()

genr = generate_decoys_from_pose(
    tpose, prefix=prefix, rank_on=rank_on, filter_dict=filter_dict
)
for d in genr:
    print(d.sequence())