# Boilerplate

In [None]:
# python internal 
import collections
import copy
import gc
from glob import glob
import h5py
import itertools
import os
print(os.getcwd())
import random
import re
import socket
print(socket.gethostname())
import shutil
import subprocess
import sys
# conda/pip
import dask
import graphviz
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy
import seaborn as sns
import tensorflow as tf
from tqdm import tqdm
# special packages on the DIGS
import py3Dmol
import pymol
import pyrosetta
# notebook magic
%matplotlib inline
%load_ext autoreload
%autoreload 2

# Flo's original approach:
7. Two-state design: 
`/home/flop/switch/5thround/DHRs/msd7/msd_scripts/` has the scripts



# I need to add a few things to Flo's method.
I will use the serialization build of PyRosetta to enable recording user defined info about the designs.  
This enables downstream inline filtering and data analysis, as well as clustering by lineage.

TODO sequence recovery of parent as a final metric in 05?  
TODO explore `FavorSequenceProfile` instead of `FavorNativeResidue`

# Make functions for looping and labeling to assist downstream penultimate design step

In [2]:
from pyrosetta.distributed.packed_pose.core import PackedPose

def msd(state_X_dict: dict, state_Y_dict: dict, **kwargs) -> PackedPose:
    """
    state_Y_dict : a scores.json style dict of state Y, which contains a path to a parent state X
    returns 
    TODO assumes middle split
    TODO ala and np penalties
    TODO only 2G mem?
    """
    
    # <Reweight scoretype="sap_constraint" weight="1.0" /> TODO
    # <MOVERS>
    #     <AddSapConstraintMover name="add_sap" speed="lightning" sap_goal="0" penalty_per_sap="3" score_selector="chainA" sap_calculate_selector="chainA" sasa_selector="chainA" /> # 1
    # </MOVERS>
    # <SIMPLE_METRICS>
    #     <SapScoreMetric name="sap_score" score_selector="chainA" />
    # </SIMPLE_METRICS>
    # <PROTOCOLS>
    #     <Add metrics="sap_score" labels="start_sap_score" />
    #     <Add mover="add_sap" />

    # ... Do a surface pack rotamers or something ...

    #     <Add metrics="sap_score" />
    # </PROTCOLS>
    import binascii
    import bz2
    from copy import deepcopy
    import os
    import pyrosetta
    from pyrosetta.rosetta.core.pose import Pose
    import pyrosetta.distributed.io as io
    
    poses = []
    for state_dict in state_X_dict, state_Y_dict:
        assert len(state_dict) == 1
        (path, scores), = state_dict.items()
        with open(path, "rb") as f:
            filename = str(binascii.b2a_hex(os.urandom(16)).decode("utf-8")) + ".pdb.temp"
            with open(filename, "w+") as g:
                print(bz2.decompress(f.read()).decode(), file=g)
            ppose = io.pose_from_file(filename)
            os.remove(filename)
            pose = io.to_pose(ppose)
            for key, value in scores.items():
                pyrosetta.rosetta.core.pose.setPoseExtraScore(pose, key, value)
        poses.append(pose)
    
    state_X, state_Y = poses[0], poses[1]
    scores = deepcopy(state_Y.scores)
    sfxn_used = scores["sfxn_used"]
    parent_sequence = state_Y.sequence()

    # TODO buried_unsats, aa_comp, res_type_constraint, SAP...                 
    sfxn_obj = pyrosetta.rosetta.protocols.rosetta_scripts.XmlObjects.create_from_string(
        """
        <SCOREFXNS>
            <ScoreFunction name="sfxn" weights="{sfxn_used}" />
            <ScoreFunction name="sfxn_design" weights="{sfxn_used}" >
                <Reweight scoretype="res_type_constraint" weight="2.0" />
                <Reweight scoretype="aa_composition" weight="1.0" />
                <Set use_hb_env_dep="true" />
                <Reweight scoretype="approximate_buried_unsat_penalty" weight="17" />
                <Set approximate_buried_unsat_penalty_burial_atomic_depth="3.5" />
                <Set approximate_buried_unsat_penalty_hbond_energy_threshold="-1.0" />
                <Set approximate_buried_unsat_penalty_natural_corrections1="true" />
                <Set approximate_buried_unsat_penalty_hbond_bonus_cross_chain="-7" />
                <Set approximate_buried_unsat_penalty_hbond_bonus_ser_to_helix_bb="1"/>
                <Reweight scoretype="lk_ball" weight="0" />
                <Reweight scoretype="lk_ball_iso" weight="0" />
                <Reweight scoretype="lk_ball_bridge" weight="0" />
                <Reweight scoretype="lk_ball_bridge_uncpl" weight="0" />                
                <Reweight scoretype="aa_composition" weight="1.0" />
                
                <Reweight scoretype="sap_constraint" weight="1.0" />
            </ScoreFunction>
        </SCOREFXNS>
        """.format(sfxn_used=sfxn_used)
    )

    sfxn = sfxn_obj.get_score_function("sfxn_design")
    sfxn_clean = sfxn_obj.get_score_function("sfxn")
    res = scores["total_length"]
    score_per_res_X, score_per_res_Y = sfxn_clean(state_X)/res, sfxn_clean(state_Y)/res
    
    ala_penalty = 1 # TODO
    np_penalty = 0.8 # TODO
    
    def yeet_pose_xyz(pose, xyz=(1,0,0)): 
        """
        Given a pose and a cartesian 3D unit vector, translates the pose
        according to 100 * the unit vector without applying a rotation:
        @pleung @bcov @flop
        Args:
            pose (Pose): The pose to move.
            xyz (tuple): The cartesian 3D unit vector to move the pose in.

        Returns:
            pose (Pose): The moved pose.
        """    
        from pyrosetta.rosetta.core.select.residue_selector import TrueResidueSelector
        from pyrosetta.rosetta.protocols.toolbox.pose_manipulation import rigid_body_move
        assert len(xyz) == 3
        pose = pose.clone()
        entire = TrueResidueSelector()
        subset = entire.apply(pose)
        # get which direction in cartesian unit vectors (xyz) to yeet pose
        unit = pyrosetta.rosetta.numeric.xyzVector_double_t(*xyz)
        scaled_xyz = tuple([100*x for x in xyz])
        far_away = pyrosetta.rosetta.numeric.xyzVector_double_t(*scaled_xyz)
        rigid_body_move(unit, 0, far_away, pose, subset)
        return pose
    
    def combined_pose_maker(poses=[]) -> Pose:
        """
        Combine up to 6 poses in a list into one multichain pose
        """
        if len(poses) == 0:
            raise RuntimeError("Empty list of poses passed")
        else:
            pass
        # get the first pose
        new_pose = poses.pop(0).clone()
        # unit vectors
        xyzs = [(1,0,0),(0,1,0),(0,0,1),(-1,0,0),(0,-1,0),(0,0,-1)]
        # go through rest of poses and add them into the first one
        for i, pose in enumerate(poses):
            xyz = xyzs[i]
            to_append = yeet_pose_xyz(pose.clone(), xyz)
            new_pose.append_pose_by_jump(
                to_append,
                new_pose.num_jump()+1, # last jump
            )
        return new_pose
    
    
    def msd_fnr(despose, refpose, weight=0, strict_layers=0, neighbors=0, design_sel=None):
        """
        Perform multi state design (MSD) using FavorNativeResidue (FNR)
        """
        true_sel = pyrosetta.rosetta.core.select.residue_selector.TrueResidueSelector()
        allres = pyrosetta.rosetta.core.select.get_residues_from_subset(true_sel.apply(despose))
        diff = pyrosetta.rosetta.utility.vector1_unsigned_long()
        # check each position for seq disagreement
        for i in allres:
            if despose.sequence(i,i) == "C": # maintain disulfides in despose
                continue
            elif refpose.sequence(i,i) == "C": # safely replace despose residue with CYS (not CYD)
                mut = pyrosetta.rosetta.protocols.simple_moves.MutateResidue()
                mut.set_target(i)
                mut.set_res_name(pyrosetta.rosetta.core.chemical.AA(2)) # 2 is CYS
                mut.apply(despose)
            elif despose.sequence(i,i) != refpose.sequence(i,i):
                diff.append(i)
                despose.replace_residue(i,refpose.residue(i),1)
            else:
                pass
        if design_sel is not None:
            designable = pyrosetta.rosetta.core.select.residue_selector.ResidueIndexSelector(design_sel)
        else:
            if neighbors == 1: # design neighbors too
                designable = pyrosetta.rosetta.core.select.residue_selector.NeighborhoodResidueSelector(
                    pyrosetta.rosetta.core.select.residue_selector.ResidueIndexSelector(diff),
                    6,
                    True,
                )
            else: # design only diff
                designable = pyrosetta.rosetta.core.select.residue_selector.ResidueIndexSelector(diff)
        packable = pyrosetta.rosetta.core.select.residue_selector.NeighborhoodResidueSelector(designable, 6, True)
        pack_option = pyrosetta.rosetta.core.pack.task.operation.RestrictToRepackingRLT()
        pack = pyrosetta.rosetta.core.pack.task.operation.OperateOnResidueSubset(pack_option, designable, True)
        lock_option = pyrosetta.rosetta.core.pack.task.operation.PreventRepackingRLT()
        lock = pyrosetta.rosetta.core.pack.task.operation.OperateOnResidueSubset(lock_option, packable, True)
        # add standard task operations
        arochi = pyrosetta.rosetta.protocols.task_operations.LimitAromaChi2Operation()
        arochi.chi2max(110)
        arochi.chi2min(70)
        arochi.include_trp(True)
        ifcl = pyrosetta.rosetta.core.pack.task.operation.InitializeFromCommandline()
        # setup custom layer design
        ss1 = pyrosetta.rosetta.core.scoring.dssp.Dssp(state_X)
        ss2 = pyrosetta.rosetta.core.scoring.dssp.Dssp(state_Y)
        surf_sel = pyrosetta.rosetta.core.select.residue_selector.LayerSelector()
        surf_sel.set_layers(0,0,1)
        surf_sel.set_use_sc_neighbors(0)
        surf_sel.set_cutoffs(20,50)
        surf1 = pyrosetta.rosetta.core.select.get_residues_from_subset(surf_sel.apply(state_X))
        surf2 = pyrosetta.rosetta.core.select.get_residues_from_subset(surf_sel.apply(state_Y))
        core_sel = pyrosetta.rosetta.core.select.residue_selector.LayerSelector()
        core_sel.set_layers(1,0,0)
        core_sel.set_use_sc_neighbors(0)
        core1 = pyrosetta.rosetta.core.select.get_residues_from_subset(core_sel.apply(state_X))
        core2 = pyrosetta.rosetta.core.select.get_residues_from_subset(core_sel.apply(state_Y))
        core_both = pyrosetta.rosetta.utility.vector1_unsigned_long()
        surf_both = pyrosetta.rosetta.utility.vector1_unsigned_long()
        bdry_core = pyrosetta.rosetta.utility.vector1_unsigned_long()
        bdry_surf = pyrosetta.rosetta.utility.vector1_unsigned_long()
        surf_core = pyrosetta.rosetta.utility.vector1_unsigned_long()
        bdry_both = pyrosetta.rosetta.utility.vector1_unsigned_long()
        for i in allres:
            if i in core1:
                if i in core2:
                    core_both.append(i)
                elif i in surf2:
                    surf_core.append(i)
                else:
                    bdry_core.append(i)
            elif i in surf1:
                if i in surf2:
                    surf_both.append(i)
                elif i in core2:
                    surf_core.append(i)
                else:
                    bdry_surf.append(i)
            else:
                if i in core2:
                    bdry_core.append(i)
                elif i in surf2:
                    bdry_surf.append(i)
                else:
                    bdry_both.append(i)
        if len(core_both) > 0:
            sel_core_both = pyrosetta.rosetta.core.select.residue_selector.ResidueIndexSelector(core_both)
        else:
            sel_core_both = pyrosetta.rosetta.core.select.residue_selector.FalseResidueSelector()
        sel_surf_both = pyrosetta.rosetta.core.select.residue_selector.ResidueIndexSelector(surf_both)
        if len(bdry_core) > 0:
            sel_bdry_core = pyrosetta.rosetta.core.select.residue_selector.ResidueIndexSelector(bdry_core)
        else:
            sel_bdry_core = pyrosetta.rosetta.core.select.residue_selector.FalseResidueSelector()
        if len(bdry_surf) > 0:
            sel_bdry_surf = pyrosetta.rosetta.core.select.residue_selector.ResidueIndexSelector(bdry_surf)
        else:
            sel_bdry_surf = pyrosetta.rosetta.core.select.residue_selector.FalseResidueSelector()
        if len(surf_core) > 0:
            sel_surf_core = pyrosetta.rosetta.core.select.residue_selector.ResidueIndexSelector(surf_core)
        else:
            sel_surf_core = pyrosetta.rosetta.core.select.residue_selector.FalseResidueSelector()
        sel_bdry_both = pyrosetta.rosetta.core.select.residue_selector.ResidueIndexSelector(bdry_both)
        if strict_layers == 1:
            sel_c = pyrosetta.rosetta.core.select.residue_selector.OrResidueSelector(sel_core_both,sel_bdry_core)
            sel_b = pyrosetta.rosetta.core.select.residue_selector.OrResidueSelector(sel_bdry_both,sel_surf_core)
            sel_s = pyrosetta.rosetta.core.select.residue_selector.OrResidueSelector(sel_surf_both,sel_bdry_surf)
        else:
            sel_c = sel_core_both
            sel_s = sel_surf_both
            sel_c_or_s = pyrosetta.rosetta.core.select.residue_selector.OrResidueSelector(sel_core_both,sel_surf_both)
            sel_b = pyrosetta.rosetta.core.select.residue_selector.NotResidueSelector(sel_c_or_s)

        objs_sel = pyrosetta.rosetta.protocols.rosetta_scripts.XmlObjects.create_from_string(
            """
            <RESIDUE_SELECTORS>
                <SecondaryStructure name="sheet" overlap="0" minH="3" minE="2" include_terminal_loops="false" use_dssp="true" ss="E"/>
                <SecondaryStructure name="entire_loop" overlap="0" minH="3" minE="2" include_terminal_loops="true" use_dssp="true" ss="L"/>
                <SecondaryStructure name="entire_helix" overlap="0" minH="3" minE="2" include_terminal_loops="false" use_dssp="true" ss="H"/>
                <And name="helix_cap" selectors="entire_loop">
                    <PrimarySequenceNeighborhood lower="1" upper="0" selector="entire_helix"/>
                </And>
                <And name="helix_start" selectors="entire_helix">
                    <PrimarySequenceNeighborhood lower="0" upper="1" selector="helix_cap"/>
                </And>
                <And name="helix" selectors="entire_helix">
                    <Not selector="helix_start"/>
                </And>
                <And name="loop" selectors="entire_loop">
                    <Not selector="helix_cap"/>
                </And>
            </RESIDUE_SELECTORS>
            """
        )
        helix_sel = objs_sel.get_residue_selector("helix")
        loop_sel = objs_sel.get_residue_selector("loop")
        helix_cap_sel = objs_sel.get_residue_selector("helix_cap")

        core_hlx_sel = pyrosetta.rosetta.core.select.residue_selector.AndResidueSelector(sel_c,helix_sel)
        bdry_hlx_sel = pyrosetta.rosetta.core.select.residue_selector.AndResidueSelector(sel_b,helix_sel)
        surf_hlx_sel = pyrosetta.rosetta.core.select.residue_selector.AndResidueSelector(sel_s,helix_sel)
        core_loop_sel = pyrosetta.rosetta.core.select.residue_selector.AndResidueSelector(sel_c,loop_sel)
        bdry_loop_sel = pyrosetta.rosetta.core.select.residue_selector.AndResidueSelector(sel_b,loop_sel)
        surf_loop_sel = pyrosetta.rosetta.core.select.residue_selector.AndResidueSelector(sel_s,loop_sel)

        # layer design task ops, allows the current residue at a given position if it is not included
        core_hlx_task = pyrosetta.rosetta.core.pack.task.operation.RestrictAbsentCanonicalAASExceptNativeRLT()
        core_hlx_task.aas_to_keep("AFILVW")
        bdry_hlx_task = pyrosetta.rosetta.core.pack.task.operation.RestrictAbsentCanonicalAASExceptNativeRLT()
        bdry_hlx_task.aas_to_keep("ADEHIKLNQRSTVWYM")
        surf_hlx_task = pyrosetta.rosetta.core.pack.task.operation.RestrictAbsentCanonicalAASExceptNativeRLT()
        surf_hlx_task.aas_to_keep("EHKQR")
        core_loop_task = pyrosetta.rosetta.core.pack.task.operation.RestrictAbsentCanonicalAASExceptNativeRLT()
        core_loop_task.aas_to_keep("AFGILPVW")
        bdry_loop_task = pyrosetta.rosetta.core.pack.task.operation.RestrictAbsentCanonicalAASExceptNativeRLT()
        bdry_loop_task.aas_to_keep("ADEFGHIKLNPQRSTVWY")
        surf_loop_task = pyrosetta.rosetta.core.pack.task.operation.RestrictAbsentCanonicalAASExceptNativeRLT()
        surf_loop_task.aas_to_keep("DEGHKNPQRST")
        hlx_cap_task = pyrosetta.rosetta.core.pack.task.operation.RestrictAbsentCanonicalAASExceptNativeRLT()
        hlx_cap_task.aas_to_keep("DNSTP")

        hlx_cap_op   = pyrosetta.rosetta.core.pack.task.operation.OperateOnResidueSubset(hlx_cap_task  , helix_cap_sel, False)
        core_hlx_op  = pyrosetta.rosetta.core.pack.task.operation.OperateOnResidueSubset(core_hlx_task , core_hlx_sel , False)
        bdry_hlx_op  = pyrosetta.rosetta.core.pack.task.operation.OperateOnResidueSubset(bdry_hlx_task , bdry_hlx_sel , False)
        surf_hlx_op  = pyrosetta.rosetta.core.pack.task.operation.OperateOnResidueSubset(surf_hlx_task , surf_hlx_sel , False)
        core_loop_op = pyrosetta.rosetta.core.pack.task.operation.OperateOnResidueSubset(core_loop_task, core_loop_sel, False)
        bdry_loop_op = pyrosetta.rosetta.core.pack.task.operation.OperateOnResidueSubset(bdry_loop_task, bdry_loop_sel, False)
        surf_loop_op = pyrosetta.rosetta.core.pack.task.operation.OperateOnResidueSubset(surf_loop_task, surf_loop_sel, False)

        # push back all task ops, assumes no sheets
        task_factory = pyrosetta.rosetta.core.pack.task.TaskFactory()
        task_factory.push_back(pack)
        task_factory.push_back(lock)
        task_factory.push_back(arochi)
        task_factory.push_back(ifcl)
        task_factory.push_back(hlx_cap_op)
        task_factory.push_back(core_hlx_op)
        task_factory.push_back(bdry_hlx_op)
        task_factory.push_back(surf_hlx_op)
        task_factory.push_back(core_loop_op)
        task_factory.push_back(bdry_loop_op)
        task_factory.push_back(surf_loop_op)

        # add design movers
        objs = pyrosetta.rosetta.protocols.rosetta_scripts.XmlObjects.create_from_string(
            """
            <MOVERS>
            <FastDesign name="fastdesign" repeats="1" relaxscript="MonomerDesign2019"
                cartesian="false" dualspace="false" ramp_down_constraints="false"
                bondangle="false" bondlength="false" min_type="lbfgs_armijo_nonmonotone">
            </FastDesign>
            <AddCompositionConstraintMover name="surface_polar" >
                <Comp entry="PENALTY_DEFINITION;TYPE ASP GLU HIS LYS ASN GLN ARG SER THR TYR;FRACT_DELTA_START -0.01;FRACT_DELTA_END 0.0;PENALTIES 0.1 0 ;FRACTION {};BEFORE_FUNCTION QUADRATIC;AFTER_FUNCTION CONSTANT;END_PENALTY_DEFINITION" />
            </AddCompositionConstraintMover>
            <AddCompositionConstraintMover name="ala_pen" >
                    <Comp entry="PENALTY_DEFINITION;TYPE ALA;ABSOLUTE 0;PENALTIES 0 {};DELTA_START 0;DELTA_END 1;BEFORE_FUNCTION CONSTANT;AFTER_FUNCTION LINEAR;END_PENALTY_DEFINITION;" />
                </AddCompositionConstraintMover>
            </MOVERS>
            """.format(np_penalty,ala_penalty))
        surfpol = objs.get_mover("surface_polar")
        surfpol.add_residue_selector(surf_sel)
        surfpol.apply(despose)
        ala_pen = objs.get_mover("ala_pen")
        ala_pen.apply(despose)
        fast_design = objs.get_mover("fastdesign")
        fast_design.set_scorefxn(sfxn)
        fast_design.set_task_factory(task_factory)
        if len(diff) > 0:
            pyrosetta.rosetta.protocols.protein_interface_design.FavorNativeResidue(despose, weight)
            fast_design.apply(despose)
        clear_constraints = pyrosetta.rosetta.protocols.constraint_movers.ClearConstraintsMover()
        clear_constraints.apply(despose)
        return

    
    # recover original interfacial residues and combine those from each state
    objs_sse = pyrosetta.rosetta.protocols.rosetta_scripts.XmlObjects.create_from_string(
        """
        <RESIDUE_SELECTORS>
            <SSElement name="part1" selection="n_term" to_selection="{pre},H,E" chain="A" reassign_short_terminal_loop="2" />
            <SSElement name="part2" selection="-{post},H,S" to_selection="c_term" chain="A" reassign_short_terminal_loop="2" />
        </RESIDUE_SELECTORS>
        """.format(
            pre = int(scores["pre_break_helix"]),
            post = int(scores["pre_break_helix"]),
        )
    )
    part1 = objs_sse.get_residue_selector("part1")
    part2 = objs_sse.get_residue_selector("part2")
    intsel = pyrosetta.rosetta.core.select.residue_selector.InterGroupInterfaceByVectorSelector(part1,part2)
    intdes = pyrosetta.rosetta.core.select.get_residues_from_subset(intsel.apply(state_Y))
    intref = pyrosetta.rosetta.core.select.get_residues_from_subset(intsel.apply(state_X))
    intall = pyrosetta.rosetta.utility.vector1_unsigned_long()
    # add all residues in either interface to be designed
    for i in intdes:
        intall.append(i)
    for i in intref:
        intall.append(i)
    
    # one round msd with no weight, lenient layers, no neighbors on all residues that are interface in either state
    msd_fnr(state_Y,state_X,0,0,0,intall)
    # one round msd with no weight, strict layers and neighbors on all residues that are different
#     msd_fnr(state_X,state_Y,0,1,1 )

#     msd_fnr(state_Y,state_X,0,1 )

#     msd_fnr(state_X,state_Y,0.2,1 )
#     msd_fnr(state_Y,state_X,0.2,1 )
#     msd_fnr(state_X,state_Y,0.5,1 )
#     msd_fnr(state_Y,state_X,0.5,1 )
#     msd_fnr(state_X,state_Y,1.0,1 )
#     msd_fnr(state_Y,state_X,1.0,1 )
#     msd_fnr(state_X,state_Y,1.5,0 )
#     msd_fnr(state_Y,state_X,1.5,0 )
#     msd_fnr(state_X,state_Y,2.0,0 )
#     msd_fnr(state_Y,state_X,2.0,0 )

#     np_penalty = 0.5

    msd_fnr(state_X,state_Y,10,0 )
    msd_fnr(state_Y,state_X,10,0 )
    assert state_X.sequence() == state_Y.sequence()
    to_return = [state_X, state_Y]
    scores_X = deepcopy(state_X.scores)
    scores_Y = deepcopy(state_Y.scores)
    combined_scores = {}
    combined_scores["abegos_X"] = scores_X["abego_str"]
    combined_scores["abegos_Y"] = scores_Y["abego_str"]
    combined_scores["dssp_X"] = scores_X["dssp"]
    combined_scores["dssp_Y"] = scores_Y["dssp"]
    combined_scores["closure_type_X"] = scores_X["closure_type"]
    combined_scores["closure_type_Y"] = scores_Y["closure_type"]
    combined_scores["disulfide_at_X"] = scores_X["disulfide_at"]
    combined_scores["disulfide_at_Y"] = scores_Y["disulfide_at"]
    combined_scores["score_per_res_pre_X"] = score_per_res_X
    combined_scores["score_per_res_pre_Y"] = score_per_res_Y
    common_keys = [
        "new_loop_resis",
        "parent",
        "scaffold",
        "sfxn_used",
        "state",
        "bb_clash",
        "nmodes_attempts",
        "pivot_helix",
        "pre_break_helix",
        "shift",
        "total_length",
    ]
    for common_key in common_keys:
        combined_scores[common_key] = scores_Y[common_key]
    combined_XY = combined_pose_maker(poses)
    # clear scores and update
    pyrosetta.rosetta.core.pose.clearPoseExtraScores(combined_XY)
    for key, value in combined_scores.items():
        pyrosetta.rosetta.core.pose.setPoseExtraScore(combined_XY, key, value)
    pyrosetta.rosetta.core.pose.setPoseExtraScore(combined_XY, "sequence", state_Y.sequence())
    pyrosetta.rosetta.core.pose.setPoseExtraScore(combined_XY, "parent_sequence", parent_sequence)
    combined_XY = io.to_packed(combined_XY)
    return combined_XY

def score(ppose: PackedPose, **kwargs) -> PackedPose:
    """
#     from __future__ import dict TODO
#     from future.types import dict
    
    """
    from copy import deepcopy
    import pyrosetta
    import pyrosetta.distributed.io as io
    from pyrosetta.distributed.tasks.rosetta_scripts import SingleoutputRosettaScriptsTask
    
    original_scores = deepcopy(ppose.pose.scores)
    pose = ppose.pose.clone()
    # clean pose
    for key, _ in original_scores.items():
        pyrosetta.rosetta.core.pose.clearPoseExtraScore(pose, key)
        
    sfxn = original_scores["sfxn_used"]
    new_loop_resis = original_scores["new_loop_resis"]
    
    score_dicts = []
    chains_to_delete = ("X", "delete_Y"), ("Y", "delete_X")
    for chain, chain_to_delete in chains_to_delete:
    
        xml = """
        <ROSETTASCRIPTS>
            <SCOREFXNS>
                <ScoreFunction name="sfxn" weights="{sfxn}" />
                <ScoreFunction name="sfxn_design" weights="{sfxn}_cart" >
                    <Set use_hb_env_dep="true" />
                    <Reweight scoretype="approximate_buried_unsat_penalty" weight="17" />
                    <Set approximate_buried_unsat_penalty_burial_atomic_depth="3.5" />
                    <Set approximate_buried_unsat_penalty_hbond_energy_threshold="-1.0" />
                    <Set approximate_buried_unsat_penalty_natural_corrections1="true" />
                    <Set approximate_buried_unsat_penalty_hbond_bonus_cross_chain="-7" />
                    <Set approximate_buried_unsat_penalty_hbond_bonus_ser_to_helix_bb="1"/>                    
                </ScoreFunction>
            </SCOREFXNS>
            <RESIDUE_SELECTORS>
                <Index name="new_loop_resis" resnums="{new_loop_resis}" />
                <Neighborhood name="around_new_loop" selector="new_loop_resis" distance="8.0" />
            </RESIDUE_SELECTORS>
            <TASKOPERATIONS>
                <IncludeCurrent name="current" />
                <LimitAromaChi2 name="arochi" chi2max="110" chi2min="70" include_trp="True" />
                <ExtraRotamersGeneric name="ex1_ex2" ex1="1" ex2="1" />
                <InitializeFromCommandline name="ifcl"/>
            </TASKOPERATIONS>
            <MOVERS>
                <SavePoseMover name="save_before_relax" restore_pose="0" reference_name="before_relax"/>
                <SwitchChainOrder name="delete_Y" chain_order="1"/>
                <SwitchChainOrder name="delete_X" chain_order="2"/>
            </MOVERS>
            <FILTERS>
                <BuriedUnsatHbonds name="vbuns" use_reporter_behavior="true" report_all_heavy_atom_unsats="true" 
                    scorefxn="sfxn" ignore_surface_res="false" print_out_info_to_pdb="true" confidence="0" 
                    use_ddG_style="false" dalphaball_sasa="true" probe_radius="1.1" atomic_depth_selection="5.5" 
                    burial_cutoff="1000" burial_cutoff_apo="0.2" />
                <BuriedUnsatHbonds name="sbuns" use_reporter_behavior="true" report_all_heavy_atom_unsats="true"
                    scorefxn="sfxn" ignore_surface_res="false" print_out_info_to_pdb="true" confidence="0"
                    use_ddG_style="false" burial_cutoff="0.01" dalphaball_sasa="true" probe_radius="1.1" 
                    atomic_depth_selection="5.5" atomic_depth_deeper_than="false" />
                <BuriedUnsatHbonds name="buns" use_reporter_behavior="true" report_all_heavy_atom_unsats="true" 
                    scorefxn="sfxn" ignore_surface_res="false" print_out_info_to_pdb="true" confidence="0" 
                    use_ddG_style="false" burial_cutoff="0.01" dalphaball_sasa="true" probe_radius="1.1"
                    max_hbond_energy="1.5" burial_cutoff_apo="0.2" />
                <ExposedHydrophobics name="exposed_hydrophobics" />
                <Geometry name="geometry"
                    confidence="0"
                    count_bad_residues="true" />
                <Geometry name="geometry_loop" 
                    residue_selector="around_new_loop" 
                    confidence="0"
                    count_bad_residues="true" />
                <SSPrediction name="mismatch_probability" confidence="0" 
                    cmd="/software/psipred4/runpsipred_single" use_probability="1" 
                    mismatch_probability="1" use_svm="1" />
                <Rmsd name="rmsd_cart" reference_name="before_relax" chains="A" superimpose="1" threshold="5" by_aln="0" confidence="0" />
                <ScoreType name="total_score_pose" scorefxn="sfxn" score_type="total_score" threshold="0" confidence="0" />
                <ResidueCount name="count" />
                <CalculatorFilter name="score_per_res" equation="total_score_full / res" threshold="-2.0" confidence="0">
                    <Var name="total_score_full" filter="total_score_pose"/>
                    <Var name="res" filter="count"/>
                </CalculatorFilter>        
                <worst9mer name="wnm_all" rmsd_lookup_threshold="0.4" confidence="0" />
                <worst9mer name="wnm_hlx" rmsd_lookup_threshold="0.4" confidence="0" only_helices="true" />

            </FILTERS>
            <MOVERS>
                <FastRelax name="relax_cart" scorefxn="sfxn_design" repeats="1" batch="false" ramp_down_constraints="false"
                    cartesian="true" bondangle="true" bondlength="true" min_type="dfpmin_armijo_nonmonotone"
                    task_operations="ifcl,current,arochi,ex1_ex2" >
                </FastRelax>
            </MOVERS>
            <APPLY_TO_POSE>
            </APPLY_TO_POSE>
            <PROTOCOLS>
                <Add mover_name="{chain_to_delete}" />
                <Add mover_name="save_before_relax" />
                <Add mover_name="relax_cart"/>

                <Add filter_name="buns" />
                <Add filter_name="sbuns" />
                <Add filter_name="vbuns" />
                <Add filter_name="exposed_hydrophobics" />
                <Add filter_name="geometry"/>
                <Add filter_name="geometry_loop"/>
                <Add filter_name="mismatch_probability" />
                <Add filter_name="rmsd_cart" />
                <Add filter_name="score_per_res" />
                <Add filter_name="wnm_all" />
                <Add filter_name="wnm_hlx" />

            </PROTOCOLS>
            <OUTPUT scorefxn="sfxn" />
        </ROSETTASCRIPTS>
        """.format(
            sfxn=sfxn,
            new_loop_resis=new_loop_resis,
            chain_to_delete=chain_to_delete,
        )
        scored = SingleoutputRosettaScriptsTask(xml)
        scored_ppose = scored(pose.clone())
        pose_scores = deepcopy(scored_ppose.pose.scores)
        pose_scores = {f"{key}_{chain}": value for key, value in pose_scores.items()}
        score_dicts.append(pose_scores)
    
    # TODO fancy splitting 
    scores = {**score_dicts[0], **score_dicts[1]}
    scores.update(original_scores)
    for key, value in scores.items():
        pyrosetta.rosetta.core.pose.setPoseExtraScore(pose, key, value)
    scored_ppose = io.to_packed(pose)
    return scored_ppose

In [None]:
import pyrosetta.distributed.io as io

state_Y_dict = {"/mnt/home/pleung/projects/bistable_bundle/r3/hinges/04_pairs/decoys/0000/2021.02.23.10.05.17.623758_36134e77670045cc9735e734e64d41bd.pdb.bz2": {"9mer": 1.1399999856948853,
"abego_str": "XAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAABBBEAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAGX", 
"bb_clash": 45.882999420166016, "closure_type": "strict_remodel", "ddg": -51.2130012512207, "disulfide_at": "74,112", "dslf_fa13": -1.1080000400543213, "dslf_fa13_cart": -1.0299999713897705, "dslf_fa13_swap": 1.1233665943145752,
"dssp": "LHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHLLLLHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHLL", 
"fa_atr": -1063.9110107421875, "fa_dun_dev": 20.341999053955078, "fa_dun_rot": 124.62899780273438, "fa_dun_semi": 186.6269989013672, "fa_elec": -450.3710021972656, "fa_intra_atr_xover4": -59.04199981689453,
"fa_intra_elec": -51.47999954223633, "fa_intra_rep_xover4": 36.2130012512207, "fa_intra_sol_xover4": 53.06700134277344, "fa_rep": 175.093994140625, "fa_sol": 859.4340209960938, 
"geometry": 0.0, "geometry_loop": 0.0, "hbond_bb_sc": -14.909000396728516, "hbond_lr_bb": -4.48199987411499, "hbond_sc": -54.308998107910156, "hbond_sr_bb": -170.64700317382812, "hxl_tors": 27.474000930786133,
"interface_buried_sasa": 1425.8680419921875, "lk_ball": 515.8469848632812, "lk_ball_bridge": 2.4149999618530273, "lk_ball_bridge_uncpl": 16.641000747680664, "lk_ball_iso": 1257.6309814453125, "new_loop_resis": "85,86,90", "nmodes_attempts": 0.0, "omega": 10.295999526977539,
"p_aa_pp": -67.94200134277344, "parent": "DHR54", "parent_length": 172.0, "pivot_helix": 4.0, "pre_break_helix": 4.0,
"pro_close": 0.0, "rama_prepro": -7.517000198364258, "ref": -30.895000457763672, "rmsd_cart": 0.3109999895095825, "rmsd_dual": 0.4970000088214874, "rmsd_nmp": 0.0, "rmsd_swap": 1.0787206888198853, "sc": 0.7210000157356262, "sc_int": 0.7319999933242798, 
"scaffold": "DHR", "score": 0.0, "score_A": -143.1649932861328, "score_B": -135.12100219726562, "score_per_res": -2.934999942779541, "sfxn_used": "beta_nov16", "shift": 4.0, "stapled": "True", "state": "DHR54_p_4_s_4", "total_length": 172.0, "total_score": -640.7839965820312}}

state_X_dict = {"/mnt/home/pleung/projects/bistable_bundle/r3/hinges/04_pack_staple_ref/decoys/0000/2021.02.20.23.23.39.916734_d3e600e4c7594039bf840f2b60c57939.pdb.bz2": {"9mer": 0.3723803460597992, 
"abego_str": "XAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAABABAAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAGX",
"bb_clash": 45.882999420166016, "closure_type": "original", "ddg": -58.28099822998047, "disulfide_at": "70,112", "dslf_fa13": -0.4200395973026856, "dslf_fa13_cart": -0.9792221188545227, 
"dssp": "LHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHLLLHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHLL",
"fa_atr": -1072.5657733662192, "fa_dun_dev": 17.389961680698256, "fa_dun_rot": 117.50105001902524, "fa_dun_semi": 197.33598473646742, "fa_elec": -452.1607473387891, "fa_intra_atr_xover4": -59.5062941106155,
"fa_intra_elec": -55.9205306115322, "fa_intra_rep_xover4": 39.65616806635941, "fa_intra_sol_xover4": 57.83738582439674, "fa_rep": 183.67624290436015, "fa_sol": 843.5239487399866, 
"geometry": 0.0, "geometry_loop": 0.0, "hbond_bb_sc": -19.917059035422962, "hbond_lr_bb": -4.149534944786535, "hbond_sc": -58.17001616146253, "hbond_sr_bb": -176.21588630828703, "hxl_tors": 22.14059606747223,
"interface_buried_sasa": 1776.594970703125, "lk_ball": 523.4039099525688, "lk_ball_bridge": 3.173558791969032, "lk_ball_bridge_uncpl": 21.336423216623945, "lk_ball_iso": 1269.4594166048453, "new_loop_resis": "85,86,90", "nmodes_attempts": 0.0, "omega": 18.9150570145028,
"p_aa_pp": -66.51955005297154, "parent": "DHR54", "parent_length": 172.0, "pivot_helix": 4.0, "pre_break_helix": 4.0,
"pro_close": 0.0, "rama_prepro": 1.122541600576045, "ref": -22.75940800000001, "rmsd_cart": 0.24544990062713623, "rmsd_dual": 0.8430862426757812, "rmsd_nmp": 1.0000000116860974e-07, "sc": 0.703000009059906, "sc_int": 0.6679999828338623,
"scaffold": "DHR", "score": 0.0, "score_A": -101.48100280761719, "score_B": -113.64199829101562, "score_per_res": -2.6670000553131104, "sfxn_used": "beta_nov16", "shift": 0.0, "total_length": 172.0, "total_score": -660.2213056207834}}

pyrosetta.init("-corrections::beta_nov16 true -indexed_structure_store:fragment_store /net/databases/VALL_clustered/connect_chains/ss_grouped_vall_helix_shortLoop.h5 -holes:dalphaball /home/bcov/ppi/tutorial_build/main/source/external/DAlpahBall/DAlphaBall.gcc -run::constant_seed 1")

test = msd(state_X_dict=state_X_dict, state_Y_dict=state_Y_dict)
test.pose.dump_pdb("test.pdb")


In [4]:
import pyrosetta.distributed.io as io
# p = pyrosetta.io.pose_from_file("test.pdb")
tppose = io.to_packed(test)
# test.scores
scored_test = score(tppose)



protocols.rosetta_scripts.RosettaScriptsParser: {0} Generating XML Schema for rosetta_scripts...
protocols.rosetta_scripts.RosettaScriptsParser: {0} ...done
protocols.rosetta_scripts.RosettaScriptsParser: {0} Initializing schema validator...
protocols.rosetta_scripts.RosettaScriptsParser: {0} ...done
protocols.rosetta_scripts.RosettaScriptsParser: {0} Validating input script...
protocols.rosetta_scripts.RosettaScriptsParser: {0} ...done
protocols.rosetta_scripts.RosettaScriptsParser: {0} Parsed script:
<ROSETTASCRIPTS>
	<SCOREFXNS>
		<ScoreFunction name="sfxn" weights="beta_nov16"/>
		<ScoreFunction name="sfxn_design" weights="beta_nov16_cart">
			<Set use_hb_env_dep="true"/>
			<Reweight scoretype="approximate_buried_unsat_penalty" weight="17"/>
			<Set approximate_buried_unsat_penalty_burial_atomic_depth="3.5"/>
			<Set approximate_buried_unsat_penalty_hbond_energy_threshold="-1.0"/>
			<Set approximate_buried_unsat_penalty_natural_corrections1="true"/>
			<Set approximate_buried_uns

In [6]:
scored_test.scores

{'abegos_X': 'XAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAABABAAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAGX',
 'abegos_Y': 'XAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAABBBEAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAGX',
 'closure_type_X': 'original',
 'closure_type_Y': 'strict_remodel',
 'disulfide_at_X': '70,112',
 'disulfide_at_Y': '74,112',
 'dssp_X': 'LHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHLLLHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHLL',
 'dssp_Y': 'LHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHLLLLHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHLL',
 'new_loop_resis': '85,86,90',
 'parent': 'DHR54',
 'parent_sequence': 'TEDERRELEKVARKAIEAAREGNTDEVREQLQRALEIARESGTEEAVKLA

# Setup Dask
Trying a adaptive SLURMCluster. to see the dashboard, forward port `8787` to `8000`:  
`local$ ssh -L 8000:localhost:8787 $USER@$HOSTNAME`  
now, the web UI is visible at `localhost:8000`  
if you"re using a local cluster make sure the node this notebook is on has the same 
number of workers as cores

In [4]:
!echo $HOSTNAME
!echo $USER

dig141
pleung


In [5]:
from dask.distributed import Client
from dask_jobqueue import SLURMCluster

cluster = SLURMCluster(cores=1,
                       processes=1,
                       job_cpu=1,
                       memory="4GB",
                       queue="medium",
                       walltime="23:55:00",
                       death_timeout=600,
                       # TODO log and scratch?
                      )
print(cluster.job_script())
# scale between 0 and 1000 workers as needed
# cluster.adapt(minimum=0, maximum=1000, wait_count=500) 
client = Client(cluster)
client

#!/usr/bin/env bash

#SBATCH -J phils-dask-minion
#SBATCH -e /home/pleung/logs/slurm_logs/phils-dask-minion-%J.err
#SBATCH -o /home/pleung/logs/slurm_logs/phils-dask-minion-%J.out
#SBATCH -p medium
#SBATCH -n 1
#SBATCH --cpus-per-task=1
#SBATCH --mem=8G
#SBATCH -t 23:55:00

/home/pleung/.conda/envs/cereal/bin/python -m distributed.cli.dask_worker tcp://172.16.131.245:37017 --nthreads 1 --memory-limit 8.00GB --name name --nanny --death-timeout 600 --local-directory $TMPDIR/dask



0,1
Client  Scheduler: tcp://172.16.131.245:37017  Dashboard: http://172.16.131.245:8787/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


In [6]:
# client.close(); cluster.close()

# Set command line options, make tasks and submit to client
TODO

# Now do states
previously used `-indexed_structure_store:fragment_store /home/bcov/sc/scaffold_comparison/data/ss_grouped_vall_all.h5`

In [None]:
import logging
import pyrosetta.distributed.io as io
from pyrosetta.distributed.cluster.core import PyRosettaCluster
logging.basicConfig(level=logging.INFO)

# silents = glob(os.path.join(os.getcwd(), "03_silents/closed.silent"))

X = {"/mnt/home/pleung/projects/bistable_bundle/r3/hinges/04_pack_staple_pair/decoys/0002/2021.02.20.23.23.39.916734_954a2959ae0c4f75ad97e8fcf5f1f5d6.pdb.bz2": {"9mer": 0.5431199073791504, "abego_str": "XAAAAAAAAAAAAAAAABBGBAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAABBGBAAAAAAAAAAAAAAAAAAAAGAAAAAAAAAAAAAAAAAAABGBAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAABGBAAAAAAAAAAAAAAAAAAAAGX", "bb_clash": 48.902000427246094, "closure_type": "loop_match", "ddg": -89.55500030517578, "disulfide_at": "78,90", "dslf_fa13": -0.47517905943438565, "dslf_fa13_cart": -0.6857700347900391, "dssp": "LHHHHHHHHHHHHHHHHLLLLHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHLLLLHHHHHHHHHHHHHHHHHHHLLLHHHHHHHHHHHHHHHHHLLLLHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHLLLLHHHHHHHHHHHHHHHHHHHHLL", "fa_atr": -1131.9843269070648, "fa_dun_dev": 15.672077950487799, "fa_dun_rot": 121.2980151068943, "fa_dun_semi": 187.85986275555263, "fa_elec": -425.109028836243, "fa_intra_atr_xover4": -57.86834369889401, "fa_intra_elec": -50.53951781676426, "fa_intra_rep_xover4": 34.734032897313355, "fa_intra_sol_xover4": 43.46504718910495, "fa_rep": 195.2647764861525, "fa_sol": 867.0298174287864, "geometry": 0.0, "geometry_loop": 0.0, "hbond_bb_sc": -11.443073686854024, "hbond_lr_bb": -4.670637986233008, "hbond_sc": -35.36367629061941, "hbond_sr_bb": -171.67466600353632, "hxl_tors": 18.786549665883346, "interface_buried_sasa": 2205.675048828125, "lk_ball": 523.3223791056238, "lk_ball_bridge": 4.2440509080008715, "lk_ball_bridge_uncpl": 32.02622661811462, "lk_ball_iso": 1291.9311995426665, "new_loop_resis": "85,86", "nmodes_attempts": 2.0, "omega": 15.865812510099369, "p_aa_pp": -71.04002288653378, "parent": "DHR52_nocys", "parent_length": 172.0, "pivot_helix": 4.0, "pre_break_helix": 4.0, "pro_close": 0.0, "rama_prepro": -9.309119659204406, "ref": -15.663529999999994, "rmsd_cart": 0.6865320801734924, "rmsd_dual": 1.1806153059005737, "rmsd_nmp": 1.6203352212905884, "sc": 0.7599999904632568, "sc_int": 0.7870000004768372, "scaffold": "DHR", "score": 0.0, "score_A": -60.69900131225586, "score_B": -80.33699798583984, "score_per_res": -2.3440001010894775, "sfxn_used": "beta_nov16", "shift": 0.0, "total_length": 172.0, "total_score": -661.4189398076114}}
Y = {"/mnt/home/pleung/projects/bistable_bundle/r3/hinges/04_pack_staple_pair/decoys/0002/2021.02.20.23.23.39.916734_a5daf68243264ea4a8dbb2484f6fab2e.pdb.bz2": {"9mer": 0.9391315579414368, "abego_str": "XAAAAAAAAAAAAAAAABBGBAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAABGBAAAAAAAAAAAAAAAAAAAGBBBAAAAAAAAAAAAAAAAABGBAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAABGBAAAAAAAAAAAAAAAAAAAAGX", "bb_clash": 111.48200225830078, "closure_type": "strict_remodel", "ddg": -76.1760025024414, "disulfide_at": "72,109", "dslf_fa13": -0.06695382092641894, "dslf_fa13_cart": -0.12322807312011719, "dssp": "LHHHHHHHHHHHHHHHHLLLLHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHLLLLHHHHHHHHHHHHHHHHHHHLLLLHHHHHHHHHHHHHHHHLLLLHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHLLLLHHHHHHHHHHHHHHHHHHHHLL", "fa_atr": -1079.2873049761338, "fa_dun_dev": 18.912295744897747, "fa_dun_rot": 111.71575536679882, "fa_dun_semi": 184.64574386607612, "fa_elec": -422.0163939990402, "fa_intra_atr_xover4": -53.97379260146157, "fa_intra_elec": -44.691226652101356, "fa_intra_rep_xover4": 33.37675020555795, "fa_intra_sol_xover4": 40.65303384390451, "fa_rep": 185.7068697692135, "fa_sol": 822.6036036207154, "geometry": 0.0, "geometry_loop": 0.0, "hbond_bb_sc": -12.012074601538346, "hbond_lr_bb": -2.704688690753457, "hbond_sc": -45.127757257739646, "hbond_sr_bb": -170.00290400307995, "hxl_tors": 18.92320838874585, "interface_buried_sasa": 1830.885009765625, "lk_ball": 520.1542699235263, "lk_ball_bridge": 3.39963959627213, "lk_ball_bridge_uncpl": 23.85125444774613, "lk_ball_iso": 1257.6145668020388, "new_loop_resis": "85,86", "nmodes_attempts": 0.0, "omega": 18.278626409416237, "p_aa_pp": -69.51985019188471, "parent": "DHR52_nocys", "parent_length": 172.0, "pivot_helix": 4.0, "pre_break_helix": 4.0, "pro_close": 0.7658047149631776, "rama_prepro": 16.81776792010708, "ref": 0.25818200000001923, "rmsd_cart": 0.27339139580726624, "rmsd_dual": 0.5062369704246521, "rmsd_nmp": 1.0000000116860974e-07, "sc": 0.7419999837875366, "sc_int": 0.7379999756813049, "scaffold": "DHR", "score": 0.0, "score_A": -58.974998474121094, "score_B": -86.53700256347656, "score_per_res": -2.2709999084472656, "sfxn_used": "beta_nov16", "shift": 5.0, "total_length": 172.0, "total_score": -617.6003080366592}}

XY = X, Y
options = { 
    "-out:level": "300",
#     "-in:file:silent_struct_type": "binary",
    "-holes:dalphaball": "/home/bcov/ppi/tutorial_build/main/source/external/DAlpahBall/DAlphaBall.gcc",
    "-indexed_structure_store:fragment_store": "/net/databases/VALL_clustered/connect_chains/ss_grouped_vall_helix_shortLoop.h5",
    "-dunbrack_prob_buried": "0.8",
    "-dunbrack_prob_nonburied": "0.8", 
    "-dunbrack_prob_buried_semi": "0.8", 
    "-dunbrack_prob_nonburied_semi": "0.8",
}

# def create_tasks(silents, options):
def create_tasks(XY, options):
#     for silent in silents:
    tasks = {"options": "-corrections::beta_nov16 true"}
    tasks["extra_options"] = options
    tasks["state_X_dict"] = XY[0]
    tasks["state_Y_dict"] = XY[1]
    tasks["set_logging_handler"] = "interactive"
#         tasks["-in:file:silent"] = silent
    yield tasks
        
if not os.getenv("DEBUG"):
    output_path = os.path.join(os.getcwd(), "05_two_state")
    PyRosettaCluster(
        tasks=create_tasks(XY, options),
        client=client,
        scratch_dir=output_path,
        output_path=output_path,
#         seeds=[1, 1, 1, 1, 1],        
        sha1=None,
    ).distribute(protocols=[msd, score])

# Look at scores
There is certainly a less embarrassing way to do this but at least this way is vectorized, so it should scale very well

In [12]:
def read_scorefile(scores):
    import pandas as pd
    scores = pd.read_json(scores, orient="records", typ="frame", lines=True)
    scores = scores.T
    mat = scores.values
    n = mat.shape[0]
    dicts = list(mat[range(n), range(n)])
    index = scores.index
    tabulated_scores = pd.DataFrame(dicts, index=index)
    return tabulated_scores
    
output_path = os.path.join(os.getcwd(), "04_pack_staple_pair")
scores = os.path.join(output_path, "scores.json")
scores_df = read_scorefile(scores)
scores_df.head()

Unnamed: 0,9mer,abego_str,bb_clash,closure_type,ddg,disulfide_at,dslf_fa13,dslf_fa13_cart,dssp,fa_atr,...,sc_int,scaffold,score,score_A,score_B,score_per_res,sfxn_used,shift,total_length,total_score
/mnt/home/pleung/projects/bistable_bundle/r3/hinges/04_pack_staple_pair/decoys/0000/2021.02.20.23.23.39.916734_4d50e1285cc24f838969da2c69e7379f.pdb.bz2,0.326431,XAAAAAAAAAAAAAAAABABAAAAAAAAAAAAAAAAABABAAAAAA...,37.347,loop_match,-38.312,72110,-0.312103,-0.178962,LHHHHHHHHHHHHHHHHLLLHHHHHHHHHHHHHHHHLLLLHHHHHH...,-947.126596,...,0.575,DHR,0.0,-91.007004,-92.731003,-2.261,beta_nov16,0.0,160.0,-554.371819
/mnt/home/pleung/projects/bistable_bundle/r3/hinges/04_pack_staple_pair/decoys/0000/2021.02.20.23.23.39.916734_ccd9bb86d22647aca5d64bd8d92ec5dc.pdb.bz2,0.585618,XAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAGBAAA...,45.882999,strict_remodel,-58.280998,71115,0.034506,0.226397,LHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHLLHHH...,-1068.693504,...,0.668,DHR,0.0,-101.481003,-113.641998,-2.667,beta_nov16,0.0,172.0,-652.091863
/mnt/home/pleung/projects/bistable_bundle/r3/hinges/04_pack_staple_pair/decoys/0000/2021.02.20.23.23.39.916734_6f580562975547d1a589eadd9948f540.pdb.bz2,0.492682,XAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAGBAAA...,45.882999,strict_remodel,-58.280998,58100,-0.856059,-0.863674,LHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHLLHHH...,-1073.787932,...,0.668,DHR,0.0,-101.481003,-113.641998,-2.667,beta_nov16,0.0,172.0,-658.278572
/mnt/home/pleung/projects/bistable_bundle/r3/hinges/04_pack_staple_pair/decoys/0000/2021.02.20.23.23.39.916734_96223287cdcf45238e82c1a245c98144.pdb.bz2,0.5092,XAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAGBAAA...,45.882999,strict_remodel,-58.280998,62112,0.31985,0.488985,LHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHLLHHH...,-1070.14768,...,0.668,DHR,0.0,-101.481003,-113.641998,-2.667,beta_nov16,0.0,172.0,-659.37657
/mnt/home/pleung/projects/bistable_bundle/r3/hinges/04_pack_staple_pair/decoys/0000/2021.02.20.23.23.39.916734_832cf846b50346c6a7007434004dbe6e.pdb.bz2,0.489375,XAAAAAAAAAAAAAAAAAAAAGBAAAAAAAAAAAAAAAAAAGBAAA...,45.882999,strict_remodel,-58.280998,70112,-0.424578,-0.277792,LHHHHHHHHHHHHHHHHHHHHLLHHHHHHHHHHHHHHHHHHLLHHH...,-1069.043643,...,0.668,DHR,0.0,-101.481003,-113.641998,-2.667,beta_nov16,0.0,172.0,-656.120368
