# Make bound and free states from the prepped inputs

### Imports

In [1]:
%load_ext lab_black
# Python standard library
from glob import glob
import os
import socket
import sys

# 3rd party library imports
import dask
import matplotlib.pyplot as plt
import pandas as pd
import pyrosetta
import numpy as np
import scipy
import seaborn as sns
from tqdm.auto import tqdm  # jupyter compatible progress bar

tqdm.pandas()  # link tqdm to pandas
# Notebook magic
# save plots in the notebook
%matplotlib inline
# reloads modules automatically before executing cells
%load_ext autoreload
%autoreload 2
print(f"running in directory: {os.getcwd()}")  # where are we?
print(f"running on node: {socket.gethostname()}")  # what node are we on?

running in directory: /mnt/home/pleung/projects/crispy_shifty/notebooks
running on node: dig42


### Set working directory to the root of the crispy_shifty repo

In [2]:
os.chdir("/home/pleung/projects/crispy_shifty")  # TODO projects dir

### Load a the scorefile containing the cleaned input scaffolds
These scaffolds had their disulfides removed and have some standard rosetta metrics, AF2 scores and various metadata written to them.  
We will also make the task generator here

In [None]:
sys.path.insert(0, os.getcwd())
from crispy_shifty.utils.io import parse_scorefile_linear


def create_tasks(scaffolds, options):
    for scaffold in scaffolds.index:
        # determine where to split the scaffold by counting the number of helixes    
        num_helices = len(scaffolds.loc[scaffold]["topo"])
        # split even numbers in half
        if num_helices % 2 == 0:
            pre_break_helices = [int(num_helices / 2)]
        # get middle two for odd numbers
        else:
            first_helix = int(num_helices / 2)  # rounds down
            pre_break_helices = [first_helix, first_helix + 1]
        for pre_break_helix in pre_break_helices:
            tasks = {}
            tasks["clash_cutoff"] = 5000
            tasks["extra_options"] = options
            # interfaces must be a ratio of 1:3 or 3:1 between the n and c term halves and the bound helix
            tasks["int_cutoff"] = 0.33
            # get the name of the original design
            tasks["name"] = (
                scaffolds.loc[scaffold]["pdb"].split("/")[-1].replace(".pdb", "", 1)
            )
            tasks["pdb_path"] = scaffold
            tasks["pre_break_helix"] = pre_break_helix
            yield tasks


scaffolds = parse_scorefile_linear(
    os.path.join(os.getcwd(), "scaffolds/01_prep_inputs/scores.json")
)

### Make helix-bound states from the scaffolds

In [None]:
# Python standard library
import os
import pwd
import socket
import sys

# 3rd party library imports
from dask.distributed import Client
from dask_jobqueue import SLURMCluster

# Rosetta library imports
from pyrosetta.distributed.cluster.core import PyRosettaCluster

# Custom library imports
sys.path.insert(0, os.getcwd())
from crispy_shifty.protocols.states import make_bound_states  # the functions we will distribute


print(
    "run the following from your local terminal to port forward the dashboard to localhost"
)
print(
    f"ssh -L 8000:localhost:8787 {pwd.getpwuid(os.getuid()).pw_name}@{socket.gethostname()}"
)
print("dashboard is now visible at localhost:8000")
print(f"can also view dashboard at {socket.gethostname()}:8787 without port forwarding")
options = {
    "-out:level": "200",  # warning outputs only
}
output_path = os.path.join(os.getcwd(), "scaffolds/02_make_states")
os.makedirs(output_path, exist_ok=True)

if __name__ == "__main__":
    # configure SLURM cluster as a context manager
    with SLURMCluster(
        cores=1,
        processes=1,
        job_cpu=1,
        memory="8GB",
        queue="short",
        walltime="3:00:00",
        death_timeout=120,
        local_directory="$TMPDIR",  # spill worker litter on local node temp storage
        log_directory="/mnt/home/pleung/logs/slurm_logs",
        extra=["--lifetime", "3h", "--lifetime-stagger", "5m"],
    ) as cluster:
        print(cluster.job_script())
        # scale between 1-150 workers,
        cluster.adapt(
            minimum=1,
            maximum=150,
            wait_count=999,  # Number of consecutive times that a worker should be suggested for removal it is removed
            interval="5s",  # Time between checks
            target_duration="60s",
        )
        # setup a client to interact with the cluster as a context manager
        with Client(cluster) as client:
            print(client)
            client.upload_file(
                os.path.join(os.getcwd(), "crispy_shifty/protocols/states.py")
            )  # upload the script that contains the functions to distribute
            PyRosettaCluster(
                client=client,
                logging_level="WARNING",
                output_path=output_path,
                project_name="crispy_shifty",
                scratch_dir=output_path,
                simulation_name="notebooks_02_make_states",
                tasks=create_tasks(scaffolds, options),
            ).distribute(protocols=[make_bound_states])
            client.close()
        cluster.scale(0)
        cluster.close()
    print("distributed run complete")

https://docs.anaconda.com/anaconda/install

run the following from your local terminal to port forward the dashboard to localhost
ssh -L 8000:localhost:8787 pleung@dig71
dashboard is now visible at localhost:8000
can also view dashboard at dig71:8787 without port forwarding
#!/usr/bin/env bash

#SBATCH -J dask-worker
#SBATCH -e /mnt/home/pleung/logs/slurm_logs/dask-worker-%J.err
#SBATCH -o /mnt/home/pleung/logs/slurm_logs/dask-worker-%J.out
#SBATCH -p short
#SBATCH -n 1
#SBATCH --cpus-per-task=1
#SBATCH --mem=8G
#SBATCH -t 3:00:00

/home/pleung/.conda/envs/crispy/bin/python -m distributed.cli.dask_worker tcp://172.16.131.101:43227 --nthreads 1 --memory-limit 7.45GiB --name dummy-name --nanny --death-timeout 120 --local-directory $TMPDIR --lifetime 3h --lifetime-stagger 5m --protocol tcp://

<Client: 'tcp://172.16.131.101:43227' processes=0 threads=0, memory=0 B>


`conda env export --prefix /home/pleung/.conda/envs/crispy > environment.yml`
to reproduce this simulation later.


### Unused blocks

In [5]:
%%time 
import pyrosetta

pyrosetta.init()


sys.path.insert(0, "~/projects/crispy_shifty/") # TODO projects
from crispy_shifty.protocols.states import make_bound_states


t = make_bound_states(
        None,
        **{
            'pdb_path': '/mnt/projects/crispy_shifty/scaffolds/01_prep_inputs/decoys/0007/notebooks_01_prep_inputs_e415bdb161304f5bacaafab4572b14aa.pdb.bz2',
            'name': 'DHR77_DHR5_l2_t2_t1_1_v4c',
            'pre_break_helix': 4,
            'clash_cutoff': 5000,
            'int_cutoff': 0.33,
        }
)

PyRosetta-4 2021 [Rosetta PyRosetta4.conda.linux.cxx11thread.serialization.CentOS.python38.Release 2021.31+release.c7009b3115c22daa9efe2805d9d1ebba08426a54 2021-08-07T10:04:12] retrieved from: http://www.pyrosetta.org
(C) Copyright Rosetta Commons Member Institutions. Created in JHU by Sergey Lyskov and PyRosetta Team.
core.init: {0} Checking for fconfig files in pwd and ./rosetta/flags
core.init: {0} Rosetta version: PyRosetta4.conda.linux.cxx11thread.serialization.CentOS.python38.Release r292 2021.31+release.c7009b3 c7009b3115c22daa9efe2805d9d1ebba08426a54 http://www.pyrosetta.org 2021-08-07T10:04:12
core.init: {0} command: PyRosetta -ex1 -ex2aro -database /home/pleung/.conda/envs/crispy/lib/python3.8/site-packages/pyrosetta/database
basic.random.init_random_generator: {0} 'RNG device' seed mode, using '/dev/urandom', seed=-1505151078 seed_offset=0 real_seed=-1505151078 thread_index=0
basic.random.init_random_generator: {0} RandomGenerator:init: Normal mode, seed=-1505151078 RG_type=