# Make Dimer States

In [1]:
# %load_ext lab_black
# Python standard library
from glob import glob
import os
import socket
import sys

# 3rd party library imports
import dask
import matplotlib.pyplot as plt
import pandas as pd
import pyrosetta
import numpy as np
import scipy
import seaborn as sns
from tqdm.auto import tqdm  # jupyter compatible progress bar

tqdm.pandas()  # link tqdm to pandas
# Notebook magic
# save plots in the notebook
%matplotlib inline
# reloads modules automatically before executing cells
%load_ext autoreload
%autoreload 2
print(f"running in directory: {os.getcwd()}")  # where are we?
print(f"running on node: {socket.gethostname()}")  # what node are we on?

running in directory: /home/broerman/projects/crispy_shifty/projects/crispy_shifty_dimers
running on node: sofia


In [2]:
os.chdir("/home/broerman/projects/crispy_shifty")

In [3]:
with open("projects/crispy_shifty_dimers/00_inputs/original_dhr.list", "w") as list_file:
    list_file.writelines(f + "\n" for f in glob("projects/crispy_shifty_dimers/00_inputs/dhrsgoodnocys/*.pdb"))
    list_file.writelines(f + "\n" for f in glob("projects/crispy_shifty_dimers/00_inputs/dhrs_nocys/*.pdb"))
    list_file.writelines(f + "\n" for f in glob("projects/crispy_shifty_dimers/00_inputs/relax_xtal/*.pdb"))
    list_file.writelines(f + "\n" for f in glob("projects/crispy_shifty_dimers/00_inputs/th_dhrs/*.pdb"))
with open("projects/crispy_shifty_dimers/00_inputs/original_thr.list", "w") as list_file:
    list_file.writelines(f + "\n" for f in glob("projects/crispy_shifty_dimers/00_inputs/harley_THR/*.pdb"))
    list_file.writelines(f + "\n" for f in glob("projects/crispy_shifty_dimers/00_inputs/thrs/*.pdb"))

In [3]:
input_dhrs = [os.path.join(os.getcwd(), 'projects/crispy_shifty_dimers/00_input_lists/original_dhr.list'),
              os.path.join(os.getcwd(), 'projects/crispy_shifty_dimers/00_input_lists/tj_DHRs_filtered_filtered.list')]
input_thrs = [os.path.join(os.getcwd(), 'projects/crispy_shifty_dimers/00_input_lists/original_thr.list')]

def create_tasks(options):
    for list_file in input_dhrs:
        with open(list_file, "r") as f:
            for file in f:
                file = file.rstrip()
                tasks = {}
                tasks["extra_options"] = options
                tasks["-s"] = file
                tasks["pre_break_helix"] = 4 # use kwargs to match the right pre_break_helix with the right scaffold type
                tasks["name"] = file.split('/')[-1][:-4]
                tasks['bb_clash_cutoff'] = 5000
                tasks['loop_dist_cutoff'] = 5 # set lenient loop distance cutoff since chains will move during design
                tasks['dhr_int_frac_cutoff'] = 0.3
                yield tasks

    for list_file in input_thrs:
        with open(list_file, "r") as f:
            for file in f:
                file = file.rstrip()
                tasks = {}
                tasks["extra_options"] = options
                tasks["-s"] = file
                tasks["pre_break_helix"] = 6 # use kwargs to match the right pre_break_helix with the right scaffold type
                tasks["name"] = file.split('/')[-1][:-4]
                tasks['bb_clash_cutoff'] = 5000
                tasks['loop_dist_cutoff'] = 5 # set lenient loop distance cutoff since chains will move during design
                tasks['dhr_int_frac_cutoff'] = 0.3
                yield tasks

In [None]:

# Python standard library
import os
import pwd
import socket
import sys

# 3rd party library imports
from dask.distributed import Client
from dask_jobqueue import SLURMCluster

# Rosetta library imports
from pyrosetta.distributed.cluster.core import PyRosettaCluster

# Custom library imports
sys.path.insert(0, os.getcwd())
from crispy_shifty.protocols.states import make_dimer_states  # the functions we will distribute

print(f"View dashboard at {socket.gethostname()}:8787")
options = {
    "-out:level": "200",  # warning outputs only
}
output_path = os.path.join(os.getcwd(), "projects/crispy_shifty_dimers/01_make_states")
os.makedirs(output_path, exist_ok=True)

if __name__ == "__main__":
    # configure SLURM cluster as a context manager
    with SLURMCluster(
        cores=1,
        processes=1,
        job_cpu=1,
        memory="4GB",
        queue="short",
        walltime="3:00:00",
        death_timeout=120,
        local_directory="$TMPDIR",  # spill worker litter on local node temp storage
        log_directory=os.path.join(output_path, "slurm_logs"),
        extra=["--lifetime", "3h", "--lifetime-stagger", "5m"],
    ) as cluster:
        print(cluster.job_script())
        # scale between 1-20 workers,
        cluster.adapt(
            minimum=1,
            maximum=20,
            wait_count=999,  # Number of consecutive times that a worker should be suggested for removal it is removed
            interval="5s",  # Time between checks
            target_duration="60s",
        )
        # setup a client to interact with the cluster as a context manager
        with Client(cluster) as client:
            print(client)
            client.upload_file(
                os.path.join(os.getcwd(), "crispy_shifty/protocols/states.py")
            )  # upload the script that contains the functions to distribute
            PyRosettaCluster(
                client=client,
                logging_level="WARNING",
                output_path=output_path,
                project_name="crispy_shifty_dimers",
                scratch_dir=output_path,
                simulation_name="CSD_01_make_states",
                tasks=create_tasks(options),
            ).distribute(protocols=[make_dimer_states])
            client.close()
        cluster.scale(0)
        cluster.close()
    print("distributed run complete")

## Everything below here is testing ##

In [13]:
sys.path.insert(0, os.getcwd())
from crispy_shifty.protocols.states import make_dimer_states

pyrosetta.distributed.maybe_init(**{
    "options": "-corrections::beta_nov16 true",
    "extra_options":{
        "-out:level": "200",
        "-holes:dalphaball": "/home/bcov/ppi/tutorial_build/main/source/external/DAlpahBall/DAlphaBall.gcc",
        "-indexed_structure_store:fragment_store": "/home/bcov/sc/scaffold_comparison/data/ss_grouped_vall_all.h5",
}})

import pyrosetta.distributed.io as io
file = '/net/shared/scaffolds/pre_scaffold_DB/tj_DHRs_filtered/X17_3_13_2_ct6_fa.pdb'
ppose = io.pose_from_file(file)
states = make_dimer_states(ppose, **{'pre_break_helix': 4, 'name': file.split('/')[-1][:-4], 'bb_clash_cutoff': 5000, 'loop_dist_cutoff': 5, 'dhr_int_frac_cutoff': 0})

for state in states:
    state = io.to_pose(state)
    state.dump_pdb(f"/home/broerman/projects/dimeric_hinges/dimeric_hinges_2/testing/01_make_states/int_check/{state.pdb_info().name()}.pdb")

0.00 min: Generating states from X17_3_13_2_ct6_fa
0.00 min: Generating state A -7 0...failed due to insufficient overlap for alignment.
0.00 min: Generating state A -7 1...failed due to insufficient overlap for alignment.
0.00 min: Generating state B -7 0...failed due to difference in loop length.
0.00 min: Generating state B -7 1...failed due to difference in loop length.
0.00 min: Generating state A -6 0...failed due to insufficient overlap for alignment.
0.00 min: Generating state A -6 1...failed due to insufficient overlap for alignment.
0.00 min: Generating state B -6 0...failed due to difference in loop length.
0.00 min: Generating state B -6 1...failed due to difference in loop length.
0.00 min: Generating state A -5 0...failed due to insufficient overlap for alignment.
0.00 min: Generating state A -5 1...failed due to backbone clashes.
0.00 min: Generating state B -5 0...failed due to difference in loop length.
0.00 min: Generating state B -5 1...failed due to difference in lo

In [9]:
sys.path.insert(0, os.getcwd())
from crispy_shifty.protocols.states import make_dimer_states

pyrosetta.distributed.maybe_init(**{
    "options": "-corrections::beta_nov16 true",
    "extra_options":{
        "-out:level": "200",
        "-holes:dalphaball": "/home/bcov/ppi/tutorial_build/main/source/external/DAlpahBall/DAlphaBall.gcc",
        "-indexed_structure_store:fragment_store": "/home/bcov/sc/scaffold_comparison/data/ss_grouped_vall_all.h5",
}})

import pyrosetta.distributed.io as io
# ppose = io.pose_from_file('/home/broerman/projects/dimeric_hinges/dimeric_hinges_2/00_inputs/all_parents/DHR82.pdb')
# states = make_states(ppose, **{'pre_break_helix': 4, 'name': 'DHR82', 'bb_clash_cutoff': 5000})

file = '/home/broerman/projects/dimeric_hinges/dimeric_hinges_2/00_inputs/dhrsgoodnocys/DHR62.pdb'
states = make_dimer_states(None, **{'pdb_path': file, 'pre_break_helix': 4, 'name': file.split('/')[-1][:-4], 'bb_clash_cutoff': 5000, 'loop_dist_cutoff': 5, 'dhr_int_frac_cutoff': 0})

for state in states:
    state = io.to_pose(state)
    state.dump_pdb(f"/home/broerman/projects/dimeric_hinges/dimeric_hinges_2/testing/01_make_states/DHR62/append_pose_to_pose_2/{state.pdb_info().name()}.pdb")

0.01 min: Generating states from DHR62
0.01 min: Generating state A -7 0...success.
0.02 min: Generating state A -7 1...success.
0.02 min: Generating state B -7 0...failed due to difference in loop length.
0.02 min: Generating state B -7 1...failed due to difference in loop length.
0.02 min: Generating state A -6 0...failed due to difference in loop length.
0.02 min: Generating state A -6 1...failed due to difference in loop length.
0.02 min: Generating state B -6 0...failed due to backbone clashes.
0.02 min: Generating state B -6 1...failed due to backbone clashes.
0.02 min: Generating state A -5 0...failed due to backbone clashes.
0.02 min: Generating state A -5 1...failed due to backbone clashes.
0.03 min: Generating state B -5 0...failed due to difference in loop length.
0.03 min: Generating state B -5 1...failed due to difference in loop length.
0.03 min: Generating state A -4 0...failed due to backbone clashes.
0.03 min: Generating state A -4 1...failed due to backbone clashes.
0