# Boilerplate

In [1]:
# python internal 
import collections
import copy
import gc
from glob import glob
import h5py
import itertools
import os
print(os.getcwd())
import random
import re
import socket
print(socket.gethostname())
import shutil
import subprocess
import sys
# conda/pip
import dask
import graphviz
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy
import seaborn as sns
import tensorflow as tf
from tqdm import tqdm
# special packages on the DIGS
import py3Dmol
import pymol
import pyrosetta
# notebook magic
%matplotlib inline
%load_ext autoreload
%autoreload 2

/mnt/home/pleung/projects/bistable_bundle/r3/hinges
dig100


# Flo's original approach:
5. I design the loops (incl. neighborhood): 
`/home/flop/switch/5thround/DHRs/loops/des*`
(path names kinda match the ones from loop closure)
6. After cleaning up loop neighborhoods with fragment constrained design, stapled using the scripts here `/home/flop/scripts/disulfides/`


# I will follow Flo's suggestion to do a different order of operations.
I will use the serialization build of PyRosetta to enable recording user defined info about the designs.  
This enables downstream inline filtering and data analysis, as well as clustering by lineage.

I will rule out designs by stapling prior to loop design cleanup. The resulting designs I will pair.


For matching, will try to always find matches for State A as the initial, parent DHR, as this is the gold standard for a reference state.  
Depending on how many matches there are, this might already be enough. But if not, can search through states and find matching length as well as match DSSP and ABEGO  

TODO sequence recovery of parent as a final metric in 05?

# Make functions for looping and labeling to assist downstream penultimate design step

In [12]:
from pyrosetta.distributed.packed_pose.core import PackedPose
from pyrosetta.distributed import requires_init
from typing import *

@requires_init
def load(silent: str, **kwargs) -> Generator[str, PackedPose, None]:
    """
    Requires init. Because of some issues with silent energies, if your energy 
    is supposed to be a string but is an empty string  (""),
    it will be set to a float value of 0.0
    @pleung
    """
    import pyrosetta
    import pyrosetta.distributed.io as io
    from pyrosetta.rosetta.core.io.silent import SilentFileOptions, SilentFileData
    from pyrosetta.rosetta.core.pose import Pose
    if silent == None:
        silent = kwargs["-in:file:silent"]
    else:
        pass
    options = SilentFileOptions() 
    sfd = SilentFileData(options) # the part that requires init
    sfd.read_file(silent)
    for tag in sfd.tags():
        ss = sfd.get_structure(tag)
        ss_energies = list(ss.energies())
        pose = Pose()
        ss.fill_pose(pose)
        pose.pdb_info().name(tag)
        for energy in ss_energies:
            key = energy.name() 
            # takes advantage of string_value returning "" for true floats
            if energy.string_value() is not "":
                value = energy.string_value() 
            else: # if your energy is supposed to be an empty string it will be set to 0.0 here
                value = energy.value()
            pyrosetta.rosetta.core.pose.setPoseExtraScore(pose, key, value)
        ppose = io.to_packed(pose)
        yield ppose
        
def pack(ppose: PackedPose, **kwargs) -> Generator[str, PackedPose, None]:
    """
    TODO    <OperateOnResidueSubset name="restrict_PRO_GLY" selector="pro_and_gly_positions">
                <PreventRepackingRLT/>
            </OperateOnResidueSubset>
    TODO add csts ?
    TODO play around with neighborhood
    """
    from copy import deepcopy
    import pyrosetta
    import pyrosetta.distributed.io as io
    from pyrosetta.distributed.tasks.rosetta_scripts import SingleoutputRosettaScriptsTask
    scores = deepcopy(ppose.pose.scores)
    new_loop_resis = scores["new_loop_resis"]
    xml = """
    <ROSETTASCRIPTS>
        <SCOREFXNS>
            <ScoreFunction name="sfxn" weights="beta_nov16" >
                <Reweight scoretype="res_type_constraint" weight="1.0" />
            </ScoreFunction> 
        </SCOREFXNS>
        <RESIDUE_SELECTORS>
            <Index name="new_loop_resis" resnums="{new_loop_resis}" />
            <Neighborhood name="around_new_loop" selector="new_loop_resis" distance="6.0" />
            <Or name="designable" selectors="new_loop_resis,around_new_loop" />
            <Neighborhood name="packable" selector="designable" distance="6.0"/>
            <Not name="not_designable" selector="designable" />
            <Not name="not_packable" selector="packable" />
            <ResidueName name="pro_and_gly_positions" residue_name3="PRO,GLY" />
            <Layer name="surface" select_core="false" select_boundary="false" select_surface="true"
                use_sidechain_neighbors="true"/>
            <Layer name="boundary" select_core="false" select_boundary="true" select_surface="false" 
                use_sidechain_neighbors="true"/>
            <Layer name="core" select_core="true" select_boundary="false" select_surface="false" 
                use_sidechain_neighbors="true"/>
            <SecondaryStructure name="sheet" overlap="0" minH="3" minE="2" include_terminal_loops="false" 
                use_dssp="true" ss="E"/>
            <SecondaryStructure name="entire_loop" overlap="0" minH="3" minE="2" include_terminal_loops="true" 
                use_dssp="true" ss="L"/>
            <SecondaryStructure name="entire_helix" overlap="0" minH="3" minE="2" include_terminal_loops="false"
                use_dssp="true" ss="H"/>
            <And name="helix_cap" selectors="entire_loop">
                <PrimarySequenceNeighborhood lower="1" upper="0" selector="entire_helix"/>
            </And>
            <And name="helix_start" selectors="entire_helix">
                <PrimarySequenceNeighborhood lower="0" upper="1" selector="helix_cap"/>
            </And>
            <And name="helix" selectors="entire_helix">
                <Not selector="helix_start"/>
            </And>
            <And name="loop" selectors="entire_loop">
                <Not selector="helix_cap"/>
            </And>
        </RESIDUE_SELECTORS>
        <TASKOPERATIONS>
            <RestrictAbsentCanonicalAAS name="design" keep_aas="ADEFGHIKLMNPQRSTVWY" />
            <OperateOnResidueSubset name="pack" selector="not_designable">
                <RestrictToRepackingRLT/>
            </OperateOnResidueSubset>
            <OperateOnResidueSubset name="lock" selector="not_packable">
                <PreventRepackingRLT/>
            </OperateOnResidueSubset>
            <DesignRestrictions name="layer_design">
                <Action selector_logic="surface AND helix_start"  aas="DEHKPQR"/>
                <Action selector_logic="surface AND helix"        aas="EHKQR"/>
                <Action selector_logic="surface AND sheet"        aas="EHKNQRST"/>
                <Action selector_logic="surface AND loop"         aas="DEGHKNPQRST"/>
                <Action selector_logic="boundary AND helix_start" aas="ADEHIKLNPQRSTVWY"/>
                <Action selector_logic="boundary AND helix"       aas="ADEHIKLNQRSTVWYM"/>
                <Action selector_logic="boundary AND sheet"       aas="DEFHIKLNQRSTVWY"/>
                <Action selector_logic="boundary AND loop"        aas="ADEFGHIKLNPQRSTVWY"/>
                <Action selector_logic="core AND helix_start"     aas="AFILVWYNQSTHP"/>
                <Action selector_logic="core AND helix"           aas="AFILVWM"/>
                <Action selector_logic="core AND sheet"           aas="FILVWY"/>
                <Action selector_logic="core AND loop"            aas="AFGILPVWYSM"/>
                <Action selector_logic="helix_cap"                aas="DNSTP"/>
            </DesignRestrictions>
            <PruneBuriedUnsats name="prune" allow_even_trades="false" 
                atomic_depth_cutoff="3.5" minimum_hbond_energy="-1.0"/>
            <LimitAromaChi2 name="arochi" chi2max="110" chi2min="70" include_trp="True" />
            <ExtraRotamersGeneric name="ex1_ex2" ex1="1" ex2="1" />
        </TASKOPERATIONS>
        <MOVERS>
            <StructProfileMover name="gen_profile" add_csts_to_pose="1" consider_topN_frags="100" 
                eliminate_background="0" ignore_terminal_residue="1" only_loops="0"
                residue_selector="designable" burialWt="0" RMSthreshold="0.6" />
            <ClearConstraintsMover name="clear_constraints" />
            <PackRotamersMover name="pack"
            task_operations="arochi,prune,ex1_ex2,design,pack,lock,layer_design"/>
        </MOVERS>
        <PROTOCOLS>
            <Add mover="gen_profile"/>
            <Add mover="pack"/>
            <Add mover="clear_constraints"/>
        </PROTOCOLS>
    </ROSETTASCRIPTS>
    """.format(new_loop_resis=new_loop_resis)
    pack = SingleoutputRosettaScriptsTask(xml)
    pack_ppose = pack(ppose.pose.clone())
    pose = io.to_pose(pack_ppose)
    for key, value in scores.items():
        pyrosetta.rosetta.core.pose.setPoseExtraScore(pose, key, value)
    final_ppose = io.to_packed(pose)
    yield final_ppose
        
def staple(ppose: PackedPose, **kwargs) -> list:
    """
    TODO ~90% of all native disulfides have an energy of less than -0.8 given the standard 1.25 weight (Ref2015).
    """
    from copy import deepcopy
    import pyrosetta
    import pyrosetta.distributed.io as io
    from pyrosetta.rosetta.protocols.rosetta_scripts import XmlObjects
    from stapler import NativeDisulfideStapler
    scores = deepcopy(ppose.pose.scores)
    pose = io.to_pose(ppose.pose.clone())
    new_loop_resis = [int(x) for x in scores["new_loop_resis"].split(",")]
    new_loop_resis.sort()
    print("new loop found at indices", new_loop_resis)
    upper_A = new_loop_resis[0] - 4
    lower_B = new_loop_resis[-1] + 4
    try:
        assert upper_A > 0
        assert lower_B < len(pose.residues)
    except AssertionError:
        print("the new loop resis are unexpectedly close to the termini")
    selectors = XmlObjects.create_from_string(
        """
        <RESIDUE_SELECTORS>
            <SSElement name="first" selection="n_term" to_selection="{pre},H,E" chain="A" reassign_short_terminal_loop="2" />
            <SSElement name="second" selection="{post},H,S" to_selection="c_term" chain="A" reassign_short_terminal_loop="2" />
        </RESIDUE_SELECTORS>
        """.format(
            pre = int(scores["pre_break_helix"]),
            post = int(scores["pre_break_helix"]+1)
        )
    )
    first = selectors.get_residue_selector("first")
    second = selectors.get_residue_selector("second")
    first_resis = list(first.apply(pose))
    second_resis = list(second.apply(pose))
    chunk_A = pyrosetta.rosetta.core.select.residue_selector.ResidueIndexSelector()
    chunk_B = pyrosetta.rosetta.core.select.residue_selector.ResidueIndexSelector()
    offset = 3 # distance we want to stay away from new loop
    length = len(pose.residues)
    assert length == scores["total_length"] 
    for i, index_bool in enumerate(first_resis, start=1):
        # don't add residues within offset distance before new loop
        if index_bool and i < (sum(first_resis) - offset):
            chunk_A.append_index(i)
        else:
            pass
    for i, index_bool in enumerate(second_resis, start=1):
        first = 0
        # don't add residues within offset distance after new loop
        if index_bool and i > (sum(1 for x in second_resis if not x) + offset):
            chunk_B.append_index(i)
        else:
            pass
    not_too_close = [chunk_B, chunk_A]
    sfxn = pyrosetta.rosetta.protocols.loops.get_fa_scorefxn() 
    # Initialize the native disulfide stapler with defaults.
    stapler = NativeDisulfideStapler(
        residue_selectors = not_too_close, # TODO other params like cutoff?
    )
    name = pose.pdb_info().name()
    poses = []
    for i, crosslinked_pose in enumerate(stapler.apply(pose)):
        suffix = "_" + str(i)
        crosslinked_pose.pdb_info().name(name+suffix)
        sfxn.score(crosslinked_pose)
        dslf_fa13 = crosslinked_pose.scores["dslf_fa13"]
        disulfide = []
        for i, aa in enumerate(crosslinked_pose.sequence(), start=1):
            if aa == "C":
                disulfide.append(str(i))
            else:
                pass
        assert len(disulfide) == 2
        disulfide_str = ",".join(disulfide)
        pyrosetta.rosetta.core.pose.setPoseExtraScore(crosslinked_pose, "disulfide_at", disulfide_str)
        for key, value in scores.items():
            pyrosetta.rosetta.core.pose.setPoseExtraScore(crosslinked_pose, key, value)
        pyrosetta.rosetta.core.pose.setPoseExtraScore(crosslinked_pose, "dslf_fa13", dslf_fa13)
        poses.append(crosslinked_pose)
    if len(poses) > 0:
        final_pposes = io.to_packed(set(poses))
    else:
        final_pposes = {}
    return final_pposes

def check_disulf_energy(ppose: PackedPose, **kwargs) -> PackedPose:
    """
    relax the pose in cartesian space, but don't save it, keep the original pose,
    get the cartesian-relaxed disulfide scores and rmsd, 
    and add them the original pose with the original scores
    TODO rmsd_cart <MoveBeforeFilter name="rmsd" mover="relax" filter="rmsd_cart" confidence="0" />
    @pleung
    """
    from copy import deepcopy
    import pyrosetta
    import pyrosetta.distributed.io as io
    from pyrosetta.distributed.tasks.rosetta_scripts import SingleoutputRosettaScriptsTask
    scores = deepcopy(ppose.pose.scores)
    xml = """
    <ROSETTASCRIPTS>
        <SCOREFXNS>
            <ScoreFunction name="sfxn_cart" weights="ref2015_cart" /> 
            <ScoreFunction name="sfxn" weights="ref2015" /> 
        </SCOREFXNS>
        <RESIDUE_SELECTORS>
        </RESIDUE_SELECTORS>
        <TASKOPERATIONS>
            <PruneBuriedUnsats name="prune" allow_even_trades="false" 
                atomic_depth_cutoff="3.5" minimum_hbond_energy="-1.0"/>
            <LimitAromaChi2 name="arochi" chi2max="110" chi2min="70" include_trp="True" />
            <ExtraRotamersGeneric name="ex1_ex2" ex1="1" ex2="1" />
        </TASKOPERATIONS>
        <FILTERS>
            <Rmsd name="rmsd_cart" reference_name="before_relax" chains="A" superimpose="0" threshold="5" by_aln="0" confidence="0" />
        </FILTERS>
        <MOVERS>
            <SavePoseMover name="save_before_relax" restore_pose="0" reference_name="before_relax"/>
            <FastRelax name="relax" scorefxn="sfxn_cart"
            task_operations="arochi,prune,ex1_ex2"
            repeats="2" 
            relaxscript="MonomerRelax2019"
            cartesian="true"
            bondangle="true"
            bondlength="true" >
                <MoveMap name="mm" bb="true" chi="true" />
            </FastRelax>
        </MOVERS>
        <PROTOCOLS>
            <Add mover="save_before_relax" />
            <Add mover="relax" />            
            <Add filter="rmsd_cart" />            
        </PROTOCOLS>
        <OUTPUT scorefxn="sfxn" />
    </ROSETTASCRIPTS>
    """
    relax = SingleoutputRosettaScriptsTask(xml)
    # relax the pose to get scores
    relax_ppose = relax(ppose.pose.clone())
    # keep the original pose...
    pose = io.to_pose(ppose)
    # ... get the cartesian-relaxed disulfide scores and rmsd...
    update = deepcopy(relax_ppose.pose.scores)
    dslf_fa13_cart = update["dslf_fa13"]
    rmsd_cart = update["rmsd_cart"]
    # ... add the original scores ...
    for key, value in scores.items():
        pyrosetta.rosetta.core.pose.setPoseExtraScore(pose, key, value)
    # ... and finally add the cartesian-relaxed disulfide scores and rmsd
    pyrosetta.rosetta.core.pose.setPoseExtraScore(pose, "dslf_fa13_cart", dslf_fa13_cart)
    pyrosetta.rosetta.core.pose.setPoseExtraScore(pose, "rmsd_cart", rmsd_cart)
    final_ppose = io.to_packed(pose)
    return final_ppose

def relax(ppose: PackedPose, **kwargs) -> PackedPose:
    """
    dualspace relax, check rmsd and fragment quality and return scored ppose
    TODO count_bad_residues="true" ?
    @pleung
    """
    from copy import deepcopy
    import pyrosetta
    import pyrosetta.distributed.io as io
    from pyrosetta.distributed.tasks.rosetta_scripts import SingleoutputRosettaScriptsTask
    scores = deepcopy(ppose.pose.scores)
    new_loop_resis = scores["new_loop_resis"]
    xml = """
    <ROSETTASCRIPTS>
        <SCOREFXNS>
            <ScoreFunction name="sfxn" weights="beta_nov16" /> 
            <ScoreFunction name="sfxn_cart" weights="beta_nov16_cart" /> 
        </SCOREFXNS>
        <RESIDUE_SELECTORS>
            <ResidueName name="CYS" residue_name3="CYS,CYD" />
            <Not name="not_CYS" selector="CYS" />
            <Index name="new_loop_resis" resnums="{new_loop_resis}" />
            <Neighborhood name="around_new_loop" selector="new_loop_resis" distance="8.0" />
        </RESIDUE_SELECTORS>
        <TASKOPERATIONS>
            <PruneBuriedUnsats name="prune" allow_even_trades="false" 
                atomic_depth_cutoff="3.5" minimum_hbond_energy="-1.0"/>
            <LimitAromaChi2 name="arochi" chi2max="110" chi2min="70" include_trp="True" />
            <ExtraRotamersGeneric name="ex1_ex2" ex1="1" ex2="1" />
        </TASKOPERATIONS>
        <FILTERS>
            FragQual name="frag_qual" confidence="0" />
            <Geometry name="geometry"
            confidence="0"
            count_bad_residues="true"
            />
            <Geometry name="geometry_loop" 
            residue_selector="around_new_loop" 
            confidence="0"
            count_bad_residues="true"
            />
            <Rmsd name="rmsd_dual" reference_name="before_relax" chains="A" superimpose="0" threshold="5" by_aln="0" confidence="0" />
            <worst9mer name="9mer" rmsd_lookup_threshold="0.4" confidence="0" />
        </FILTERS>
        <MOVERS>
            <SavePoseMover name="save_before_relax" restore_pose="0" reference_name="before_relax"/>
            <FastRelax name="relax" scorefxn="sfxn_cart"
            task_operations="arochi,prune,ex1_ex2"
            repeats="2" 
            relaxscript="MonomerRelax2019"
            dualspace="true"
            bondangle="true"
            bondlength="true" >
                <MoveMap name="mm" bb="true" chi="true" >
                    <ResidueSelector selector="CYS" 
                    chi="true" 
                    bb="true"
                    bondangle="true" 
                    bondlength="true" />
                    <ResidueSelector selector="not_CYS" 
                    chi="true" 
                    bb="true"
                    bondangle="false" 
                    bondlength="false" />
                </MoveMap>
            </FastRelax>
        </MOVERS>
        <PROTOCOLS>
            <Add mover="relax" />
            <Add mover="save_before_relax" />
            <Add filter="geometry" />
            <Add filter="geometry_loop" />
            <Add filter="rmsd_dual" />
            <Add filter="9mer" />
        </PROTOCOLS>
        <OUTPUT scorefxn="sfxn" />
    </ROSETTASCRIPTS>
    """.format(new_loop_resis=new_loop_resis)
    relax = SingleoutputRosettaScriptsTask(xml)
    relax_ppose = relax(ppose.pose.clone())
    pose = io.to_pose(relax_ppose)
    update = deepcopy(pose.scores)
    # update scores for values that were updated
    for key, value in scores.items():
        if key in update.keys():
            pyrosetta.rosetta.core.pose.setPoseExtraScore(pose, key, update[key])
        else:
            pyrosetta.rosetta.core.pose.setPoseExtraScore(pose, key, value)
    final_ppose = io.to_packed(pose)
    return final_ppose

In [6]:
pyrosetta.init("-corrections::beta_nov16 true -indexed_structure_store:fragment_store /net/databases/VALL_clustered/connect_chains/ss_grouped_vall_helix_shortLoop.h5")
stapled = []
for i, tppose in enumerate(load("03_silents/closed.silent")):
    if i > 10: 
        break
    else:
        pass
    test = next(pack(tppose))
    stapled.append(staple(test))

protocols.task_operations.PruneBuriedUnsatsOperation: {0} Hbond graph has: 27935 edges requiring: 8295720 bytes
core.scoring.atomic_depth.AtomicDepth: {0} actual boxlength 177, box[122*171*177], resolution  0.504
protocols.task_operations.PruneBuriedUnsatsOperation: {0} Pruned 1566 / 11784 rotamers.
core.pack.pack_rotamers: {0} built 10218 rotamers at 40 positions.
core.pack.pack_rotamers: {0} Requesting all available threads for interaction graph computation.
core.pack.interaction_graph.interaction_graph_factory: {0} Instantiating LinearMemoryInteractionGraph
core.pack.rotamer_set.RotamerSets: {0} Completed interaction graph pre-calculation in 1 available threads (1 had been requested).
protocols.rosetta_scripts.ParsedProtocol: {0} setting status to success
new loop found at indices [240, 241, 242, 243, 244, 245, 246, 250]
protocols.rosetta_scripts.RosettaScriptsParser: {0} Generating XML Schema for rosetta_scripts...
protocols.rosetta_scripts.RosettaScriptsParser: {0} ...done
protoco

In [10]:
stapled_flat = [item for sublist in stapled for item in sublist]
for i, ppose in enumerate(stapled_flat):
    ppose.pose.dump_pdb("st{i}.pdb".format(i=i))


In [None]:
sts = []
for i, st in enumerate(stapled_flat):
    print(st.pose.scores)
    st = check_disulf_energy(st)
    st = relax(st)
    sts.append(st)
#     st.pose.dump_pdb("r{i}.pdb".format(i=i))
#     print(st.pose.pdb_info().name())
#     print(st.pose.scores)
#     print(st.pose.sequence())

{'abego_str': 'XBAAAAAAAAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAAAAAABAABAAAAAAAAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAAAAABAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAAAAAABAABAAAAAAAAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAAAAAABX', 'closure_type': 'strict_remodel', 'disulfide_at': '77,192', 'dssp': 'LLHHHHHHHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHHHLLLLHHHHHHHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHHLLLLLLHHHHHHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHHHLLLLHHHHHHHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHHHLL', 'new_loop_resis': '111,112,113,114,117', 'parent': 'hDHR79_5CWP', 'scaffold': 'DHR', 'sfxn_used': 'beta_nov16', 'bb_clash': 16754.9609375, 'ddg': -102.7239990234375, 'dslf_fa13': 8.781081957150853, 'fa_atr': -1432.6243916129654, 'fa_dun_dev': 48.369768054756776, 'fa_dun_rot': 177.3428419163667, 'fa_dun_semi': 271.5680518867537, 'fa_elec': -477.3627652488801, 'fa_intra_atr_xover4': -93.08458968629012, 'fa_intra_elec': -40.81447397736922, 'fa_intra_rep_xover4'

# Setup Dask
Trying a adaptive SLURMCluster. to see the dashboard, forward port `8787` to `8000`:  
`local$ ssh -L 8000:localhost:8787 $USER@$HOSTNAME`  
now, the web UI is visible at `localhost:8000`  
if you're using a local cluster make sure the node this notebook is on has the same 
number of workers as cores

In [5]:
!echo $HOSTNAME
!echo $USER

dig64
pleung


In [6]:
from dask.distributed import Client
from dask_jobqueue import SLURMCluster

cluster = SLURMCluster(cores=1,
                       processes=1,
                       job_cpu=1,
                       memory="6GB",
                       queue="medium",
                       walltime="11:30:00",
                      )
print(cluster.job_script())
# scale between 0 and 500 workers as needed
cluster.adapt(minimum=0, maximum=500) 
client = Client(cluster)
client

#!/usr/bin/env bash

#SBATCH -J dask-worker
#SBATCH -e /home/pleung/logs/slurm_logs/dask-worker-%J.err
#SBATCH -o /home/pleung/logs/slurm_logs/dask-worker-%J.out
#SBATCH -p short
#SBATCH -n 1
#SBATCH --cpus-per-task=1
#SBATCH --mem=4G
#SBATCH -t 30:00

/home/pleung/.conda/envs/cereal/bin/python -m distributed.cli.dask_worker tcp://172.16.131.94:45583 --nthreads 1 --memory-limit 4.00GB --name name --nanny --death-timeout 60 --local-directory $TMPDIR/dask --no-nanny --no-dashboard



0,1
Client  Scheduler: tcp://172.16.131.94:45583  Dashboard: http://172.16.131.94:8787/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


In [7]:
# client.close(); cluster.close()

# Set command line options, make tasks and submit to client
previously used `-indexed_structure_store:fragment_store /home/bcov/sc/scaffold_comparison/data/ss_grouped_vall_all.h5`

In [8]:
import logging
import pyrosetta.distributed.io as io
from pyrosetta.distributed.cluster.core import PyRosettaCluster

logging.basicConfig(level=logging.INFO)
silents = glob(os.path.join(os.getcwd(), "02_silents/states.silent"))

options = { 
    "-out:level": "300",
    "-in:file:silent_struct_type": "binary",
    "-holes:dalphaball": "/home/bcov/ppi/tutorial_build/main/source/external/DAlpahBall/DAlphaBall.gcc",
    "-indexed_structure_store:fragment_store": "/net/databases/VALL_clustered/connect_chains/ss_grouped_vall_helix_shortLoop.h5",
    "-dunbrack_prob_buried": "0.8",
    "-dunbrack_prob_nonburied": "0.8", 
    "-dunbrack_prob_buried_semi": "0.8", 
    "-dunbrack_prob_nonburied_semi": "0.8",
}

def create_tasks(silents, options):
    for silent in silents:
        tasks = {"options": "-corrections::beta_nov16 true"}
        tasks["extra_options"] = options
        tasks["set_logging_handler"] = "interactive"
        tasks["-in:file:silent"] = silent
        yield tasks
        
if not os.getenv("DEBUG"):
    output_path = os.path.join(os.getcwd(), "04_staple_pair")
    PyRosettaCluster(
        tasks=create_tasks(silents, options),
        client=client,
        scratch_dir=output_path,
        output_path=output_path,
    ).distribute(protocols=[load, pack, staple, relax])

`conda env export --prefix /home/pleung/.conda/envs/cereal > environment.yml`
to reproduce this simulation later.
INFO:pyrosetta.distributed:maybe_init performing pyrosetta initialization: {'options': '-run:constant_seed 1 -multithreading:total_threads 1', 'extra_options': '-mute all', 'set_logging_handler': 'interactive', 'silent': True}
INFO:pyrosetta.rosetta:Found rosetta database at: /home/pleung/.conda/envs/cereal/lib/python3.7/site-packages/pyrosetta/database; using it....
INFO:pyrosetta.rosetta:PyRosetta-4 2020 [Rosetta PyRosetta4.conda.linux.cxx11thread.serialization.CentOS.python37.Release 2020.50+release.1295438cd4bd2be39c9dbbfab8db669ab62415ab 2020-12-12T00:30:01] retrieved from: http://www.pyrosetta.org
(C) Copyright Rosetta Commons Member Institutions. Created in JHU by Sergey Lyskov and PyRosetta Team.


https://docs.anaconda.com/anaconda/install



# Look at scores
There is certainly a less embarrassing way to do this but at least this way is vectorized, so it should scale very well

In [9]:
def read_scorefile(scores):
    import pandas as pd
    scores = pd.read_json(scores, orient="records", typ="frame", lines=True)
    scores = scores.T
    mat = scores.values
    n = mat.shape[0]
    dicts = list(mat[range(n), range(n)])
    index = scores.index
    tabulated_scores = pd.DataFrame(dicts, index=index)
    return tabulated_scores
    
output_path = os.path.join(os.getcwd(), "04_staple_pair")
scores = os.path.join(output_path, "scores.json")
scores_df = read_scorefile(scores)
scores_df.head()

Unnamed: 0,bb_clash,closure_type,ddg,disulfide_at,dslf_fa13,dssp,fa_atr,fa_dun,fa_dun_dev,fa_dun_rot,...,scaffold,score,score_A,score_B,score_per_res,sfxn_used,shift,total_length,total_score,yhh_planarity
/mnt/home/pleung/projects/bistable_bundle/r3/hinges/04_staple_pair/decoys/0000/2021.02.05.15.33.11.462101_ecefb8013b6c47ddaf330aa5bdf5b7f4.pdb.bz2,1225.391968,lenient,-66.462997,56114,5.438535,LHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHHH...,-1246.568641,556.873673,36.574001,141.787003,...,TH_DHR,0.0,-128.449005,-132.048004,-2.666,beta_nov16,7.0,202.0,2632.416732,0.0
/mnt/home/pleung/projects/bistable_bundle/r3/hinges/04_staple_pair/decoys/0000/2021.02.05.15.33.11.462101_a0598d585d9441e195999be74b2f70f1.pdb.bz2,1225.391968,lenient,-66.462997,67124,17.581094,LHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHHH...,-1239.281709,552.260353,36.574001,141.787003,...,TH_DHR,0.0,-128.449005,-132.048004,-2.666,beta_nov16,7.0,202.0,2486.437685,0.0
/mnt/home/pleung/projects/bistable_bundle/r3/hinges/04_staple_pair/decoys/0000/2021.02.05.15.33.11.462101_4dc8835625834aa3ac12650ee23dd635.pdb.bz2,1225.391968,lenient,-66.462997,60117,18.340962,LHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHHH...,-1234.614434,551.274022,36.574001,141.787003,...,TH_DHR,0.0,-128.449005,-132.048004,-2.666,beta_nov16,7.0,202.0,2485.949566,0.0
/mnt/home/pleung/projects/bistable_bundle/r3/hinges/04_staple_pair/decoys/0000/2021.02.05.15.33.11.462101_63f0db7651cf48b6a892c1a328016566.pdb.bz2,70.338997,lenient,-108.647003,103162,11.959568,LHHHHHHHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHH...,-1542.308456,636.919266,42.985001,189.647995,...,DHR,0.0,-171.576004,-186.817993,-3.1,beta_nov16,0.0,231.0,1485.253443,0.025595
/mnt/home/pleung/projects/bistable_bundle/r3/hinges/04_staple_pair/decoys/0000/2021.02.05.15.33.11.462101_58fd04b979a34029adb449f646e655c1.pdb.bz2,88.934998,lenient,-64.790001,32171,7.301336,LHHHHHHHHHHHHHHHHHHLLLLLHHHHHHHHHHHHHHHHHHHHLL...,-1894.323595,868.082748,45.569,250.201996,...,THR,0.0,-218.315994,-237.815002,-2.716,beta_nov16,6.0,298.0,2520.511947,0.017626


In [10]:
len(scores_df)

144

In [11]:
scores_df.columns

Index(['bb_clash', 'closure_type', 'ddg', 'disulfide_at', 'dslf_fa13', 'dssp',
       'fa_atr', 'fa_dun', 'fa_dun_dev', 'fa_dun_rot', 'fa_dun_semi',
       'fa_elec', 'fa_intra_atr_xover4', 'fa_intra_elec', 'fa_intra_rep',
       'fa_intra_rep_xover4', 'fa_intra_sol_xover4', 'fa_rep', 'fa_sol',
       'hbond_bb_sc', 'hbond_lr_bb', 'hbond_sc', 'hbond_sr_bb', 'hxl_tors',
       'interface_buried_sasa', 'lk_ball', 'lk_ball_bridge',
       'lk_ball_bridge_uncpl', 'lk_ball_iso', 'lk_ball_wtd', 'new_loop_resis',
       'omega', 'p_aa_pp', 'parent', 'parent_length', 'pivot_helix',
       'pre_break_helix', 'pro_close', 'rama_prepro', 'ref', 'sc', 'sc_int',
       'scaffold', 'score', 'score_A', 'score_B', 'score_per_res', 'sfxn_used',
       'shift', 'total_length', 'total_score', 'yhh_planarity'],
      dtype='object')

In [14]:
list(scores_df["dslf_fa13"])

[5.438534978012019,
 17.58109422264533,
 18.340961881827035,
 11.959567959218871,
 7.30133579310764,
 23.349278344440627,
 1.3722084201027092,
 20.61465137216387,
 10.45333559825284,
 25.379314292521027,
 4.200967940277531,
 41.526047025789936,
 18.151717708539785,
 24.19243737381412,
 0.5983479913218901,
 4.761877010226436,
 16.48297454693737,
 7.30133579310764,
 1.3722084201027092,
 23.349278344440627,
 24.192437373814105,
 4.200967940277531,
 10.45333559825284,
 25.379314292521027,
 41.526047025789936,
 16.48297454693737,
 0.5983479913219231,
 20.61465137216385,
 2.652480598943569,
 4.761877010226436,
 18.151717708539795,
 18.340961881827035,
 17.58109422264533,
 5.438534978012019,
 23.349278344440627,
 1.3722084201027092,
 7.30133579310764,
 5.7272861340190335,
 9.02791455460266,
 6.926120357955823,
 9.027914554602866,
 5.727286134019123,
 6.926120357955944,
 0.5983479913219231,
 16.48297454693737,
 18.151717708539795,
 10.45333559825284,
 4.761877010226436,
 4.200967940277531,
 24