# Boilerplate

In [1]:
# python internal 
import collections
import copy
import gc
from glob import glob
import h5py
import itertools
import os
print(os.getcwd())
import random
import re
import socket
print(socket.gethostname())
import shutil
import subprocess
import sys
# conda/pip
import dask
import graphviz
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy
import seaborn as sns
import tensorflow as tf
from tqdm import tqdm
# special packages on the DIGS
import py3Dmol
import pymol
import pyrosetta
# notebook magic
%matplotlib inline
%load_ext autoreload
%autoreload 2

/mnt/home/pleung/projects/bistable_bundle/r3/hinges
dig64


# Flo's original approach
2. I design the states as "dimers", examplary run including the corresponding scripts can be found in `/home/flop/switch/5thround/DHRs/des_states6_np_all_flexbb/`

thus is the last round I did, nonpolar interface, flex bb, no selection before 
design. In earlier rounds I played around with polar interface design, 
fixedbb, only rigid body movement across the jump, keeping the bb fixed in one
of the two halves etc.

but I settled for the simple approach in des_states6

3. I then filtered the designed states using standard interface metrics. I made selections of different stringency, unfortunately IO did not save the exact criteria for the least stringent one that I ended up using

but I think it was probably something like interface_sc >= 0.65, score per res
<= -2.5, geometry == 1, and some kind of sasa/cms/ddG cutoff 
(or maybe not, i might hav ignored that one here)

# I will follow Flo's design procedure with some changes.
I will use the serialization build of PyRosetta to enable recording user defined info about the designs.  
This enables downstream inline filtering and data analysis, as well as clustering by lineage.
I will try using Hugh's 19A refit weights/scorefunction, as well as `beta_nov16` to compare their efficacy.  
I tried fixbb followed by flexbb design for the design, and took out a lot of the filters I didn't think were informative.

# Make functions for design and scoring

In [2]:
from pyrosetta.distributed.packed_pose.core import PackedPose
from typing import * # TODO hopefully pyrosetta distributed can handle type hints

def load(silent: str, **kwargs) -> Generator[str, PackedPose, None]:
    import pyrosetta.distributed.io as io
    if silent == None:
        silent = kwargs["-in:file:silent"]
    pposes = io.poses_from_silent(silent)
    for ppose in pposes:
        yield ppose

        
def design(ppose: PackedPose, **kwargs) -> Generator[PackedPose, PackedPose, None]:
    """Generator of PackedPose objects, does 2rds. of fixbb design followed by 3rds. of flexbb design."""
    import pyrosetta
    import pyrosetta.distributed.io as io
    from pyrosetta.distributed.tasks.rosetta_scripts import SingleoutputRosettaScriptsTask
    if ppose == None:
        ppose = io.pose_from_file(kwargs["s"])
    else:
        pass
    if "sfxn" in kwargs:
        sfxn = kwargs["sfxn"]
    else:
        sfxn = "beta_nov16"
    xml = """
    <ROSETTASCRIPTS>
        <SCOREFXNS>
            <ScoreFunction name="sfxn" weights="{sfxn}" /> 
            <ScoreFunction name="sfxn_design" weights="{sfxn}" >
                <Reweight scoretype="res_type_constraint" weight="1.0" />
            </ScoreFunction>
        </SCOREFXNS>
        <RESIDUE_SELECTORS>
            <Chain name="chA" chains="A"/>
            <Chain name="chB" chains="B"/>
            <Neighborhood name="interface_chA" selector="chB" distance="8.0" />
            <Neighborhood name="interface_chB" selector="chA" distance="8.0" />
            <And name="interface_AB" selectors="interface_chA,interface_chB" />
            <ResidueName name="pro_and_gly_positions" residue_name3="PRO,GLY" />
            <Or name="designable" selectors="interface_AB" />
            <Not name="not_designable" selector="designable" />
            <Layer name="surface" select_core="false" select_boundary="false" select_surface="true"
                use_sidechain_neighbors="true"/>
            <Layer name="boundary" select_core="false" select_boundary="true" select_surface="false" 
                use_sidechain_neighbors="true"/>
            <Layer name="core" select_core="true" select_boundary="false" select_surface="false" 
                use_sidechain_neighbors="true"/>
            <SecondaryStructure name="sheet" overlap="0" minH="3" minE="2" include_terminal_loops="false" 
                use_dssp="true" ss="E"/>
            <SecondaryStructure name="entire_loop" overlap="0" minH="3" minE="2" include_terminal_loops="true" 
                use_dssp="true" ss="L"/>
            <SecondaryStructure name="entire_helix" overlap="0" minH="3" minE="2" include_terminal_loops="false"
                use_dssp="true" ss="H"/>
            <And name="helix_cap" selectors="entire_loop">
                <PrimarySequenceNeighborhood lower="1" upper="0" selector="entire_helix"/>
            </And>
            <And name="helix_start" selectors="entire_helix">
                <PrimarySequenceNeighborhood lower="0" upper="1" selector="helix_cap"/>
            </And>
            <And name="helix" selectors="entire_helix">
                <Not selector="helix_start"/>
            </And>
            <And name="loop" selectors="entire_loop">
                <Not selector="helix_cap"/>
            </And>
        </RESIDUE_SELECTORS>
        <TASKOPERATIONS>
            <DesignRestrictions name="layer_design">
                <Action selector_logic="surface AND helix_start"  aas="DEHKPQR"/>
                <Action selector_logic="surface AND helix"        aas="EHKQR"/>
                <Action selector_logic="surface AND sheet"        aas="EHKNQRST"/>
                <Action selector_logic="surface AND loop"         aas="DEGHKNPQRST"/>
                <Action selector_logic="boundary AND helix_start" aas="ADEHIKLNPQRSTVWY"/>
                <Action selector_logic="boundary AND helix"       aas="ADEHIKLNQRSTVWYM"/>
                <Action selector_logic="boundary AND sheet"       aas="DEFHIKLNQRSTVWY"/>
                <Action selector_logic="boundary AND loop"        aas="ADEFGHIKLNPQRSTVWY"/>
                <Action selector_logic="core AND helix_start"     aas="AFILVWYNQSTHP"/>
                <Action selector_logic="core AND helix"           aas="AFILVWM"/>
                <Action selector_logic="core AND sheet"           aas="FILVWY"/>
                <Action selector_logic="core AND loop"            aas="AFGILPVWYSM"/>
                <Action selector_logic="helix_cap"                aas="DNSTP"/>
            </DesignRestrictions>
            <PruneBuriedUnsats name="prune" allow_even_trades="false" 
                atomic_depth_cutoff="3.5" minimum_hbond_energy="-1.0"/>
            <ProteinProteinInterfaceUpweighter name="upweight_int" interface_weight="3" />
            <LimitAromaChi2 name="arochi" chi2max="110" chi2min="70" include_trp="True" />
            <ExtraRotamersGeneric name="ex1_ex2" ex1="1" ex2="1" />
            <OperateOnResidueSubset name="int_only" selector="not_designable">
                <PreventRepackingRLT/>
            </OperateOnResidueSubset>
            <OperateOnResidueSubset name="restrict_PRO_GLY" selector="pro_and_gly_positions">
                <PreventRepackingRLT/>
            </OperateOnResidueSubset>
        </TASKOPERATIONS>
        <FILTERS>
        </FILTERS>
        <MOVERS>
            <StructProfileMover name="gen_profile" add_csts_to_pose="1" consider_topN_frags="100" 
                eliminate_background="0" ignore_terminal_residue="1" only_loops="0"
                residue_selector="designable" burialWt="0" RMSthreshold="0.6" />
            <ClearConstraintsMover name="clear_constraints" />
            <FastDesign name="fixbb_with_jump" scorefxn="sfxn_design" repeats="1" 
                task_operations="arochi,ex1_ex2,upweight_int,restrict_PRO_GLY,layer_design,prune,int_only" 
                batch="false" ramp_down_constraints="false" cartesian="false" bondangle="false" 
                bondlength="false" min_type="dfpmin_armijo_nonmonotone" relaxscript="InterfaceDesign2019" >
                <MoveMap name="MM" >
                    <Chain number="1" chi="true" bb="false" />
                    <Chain number="2" chi="true" bb="false" />
                    <Jump number="1" setting="true" />
                </MoveMap>
            </FastDesign>
            <FastDesign name="flexbb_with_jump" scorefxn="sfxn_design" repeats="2"
                task_operations="arochi,ex1_ex2,upweight_int,restrict_PRO_GLY,layer_design,prune,int_only" 
                batch="false" ramp_down_constraints="false" cartesian="false" bondangle="false" 
                bondlength="false" min_type="dfpmin_armijo_nonmonotone" relaxscript="InterfaceDesign2019" >
                <MoveMap name="MM" >
                    <Chain number="1" chi="true" bb="true" />
                    <Chain number="2" chi="true" bb="true" />
                    <Jump number="1" setting="true" />
                </MoveMap>
            </FastDesign>
        </MOVERS>
        <APPLY_TO_POSE>
        </APPLY_TO_POSE>
        <PROTOCOLS>
            <Add mover="gen_profile" />
            <Add mover="fixbb_with_jump" />
            <Add mover="flexbb_with_jump" />
            <Add mover="clear_constraints" />
        </PROTOCOLS>
        <OUTPUT scorefxn="sfxn" />
    </ROSETTASCRIPTS>
    """.format(sfxn=sfxn)
    fast_design = SingleoutputRosettaScriptsTask(xml)
    designed_ppose = fast_design(ppose.pose.clone())
    pose = io.to_pose(designed_ppose)
    pyrosetta.rosetta.core.pose.setPoseExtraScore(pose, "sfxn_used", sfxn)
    designed_ppose = io.to_packed(pose)
    yield designed_ppose

    
def score(ppose: PackedPose, **kwargs) -> PackedPose:
    import pyrosetta
    import pyrosetta.distributed.io as io
    from pyrosetta.distributed.tasks.rosetta_scripts import SingleoutputRosettaScriptsTask
    if ppose == None:
        ppose = io.pose_from_file(kwargs["s"])
    else:
        pass
    sfxn = ppose.pose.scores["sfxn_used"]
    xml = """
    <ROSETTASCRIPTS>
        <SCOREFXNS>
            <ScoreFunction name="sfxn" weights="{sfxn}" />            
        </SCOREFXNS>
        <RESIDUE_SELECTORS>
            <Chain name="chA" chains="A"/>
            <Chain name="chB" chains="B"/>
            <Neighborhood name="interface_chA" selector="chB" distance="8.0" />
            <Neighborhood name="interface_chB" selector="chA" distance="8.0" />
            <And name="interface_AB" selectors="interface_chA,interface_chB" />            
        </RESIDUE_SELECTORS>
        <TASKOPERATIONS>
            <ProteinInterfaceDesign name="pack_long" design_chain1="0" design_chain2="0" jump="1" interface_distance_cutoff="15"/>
        </TASKOPERATIONS>
        <MOVERS>
            <SwitchChainOrder name="cutB" chain_order="1" />
            <SwitchChainOrder name="cutA" chain_order="2" />
            <ScoreMover name="scorepose" scorefxn="sfxn" verbose="false" />
            <TaskAwareMinMover name="min" scorefxn="sfxn" bb="0" chi="1" task_operations="pack_long" />
        </MOVERS>
        <FILTERS>
            <Ddg name="ddg" threshold="-10" jump="1" repeats="5" repack="1" relax_mover="min" confidence="0" scorefxn="sfxn" />
            <Sasa name="interface_buried_sasa" confidence="0" />
            <SSShapeComplementarity name="sc" verbose="1" loops="1" helices="1" />
            <ShapeComplementarity name="sc_int" verbose="0" min_sc="0.55" write_int_area="1" write_median_dist="1" jump="1" confidence="0"/>
            <TaskAwareScoreType name="tot_score" scorefxn="sfxn" score_type="total_score" threshold="0" mode="total"  confidence="0" />
            <MoveBeforeFilter name="score_A" mover="cutB" filter="tot_score" confidence="0"/>
            <MoveBeforeFilter name="score_B" mover="cutA" filter="tot_score" confidence="0"/>
            <ScoreType name="total_score_pose" scorefxn="sfxn" score_type="total_score" threshold="0" confidence="0" />
            <ResidueCount name="count" />
            <CalculatorFilter name="score_per_res" equation="total_score_full / res" threshold="-2.0" confidence="0">
                <Var name="total_score_full" filter="total_score_pose"/>
                <Var name="res" filter="count"/>
            </CalculatorFilter>
        </FILTERS>
        <APPLY_TO_POSE>
        </APPLY_TO_POSE>
        <PROTOCOLS>
            <Add filter_name="ddg" />
            <Add filter_name="interface_buried_sasa" />
            <Add filter_name="sc" />
            <Add filter_name="sc_int" />
            <Add filter_name="score_A"/>
            <Add filter_name="score_B"/>
            <Add filter_name="score_per_res" />
        </PROTOCOLS>
        <OUTPUT scorefxn="sfxn" />
    </ROSETTASCRIPTS>
    """.format(sfxn=sfxn)
    scored = SingleoutputRosettaScriptsTask(xml)
    scored_ppose = scored(ppose.pose.clone())
    return scored_ppose

def join_flag_string(file:str) -> str:
    string = ""
    with open(file) as f:
        for line in f:
            if "#" not in line:
                string += line
            else:
                pass
    return string

# Setup Dask
Trying a adaptive SLURMCluster. to see the dashboard, forward port `8787` to `8000`:  
`local$ ssh -L 8000:localhost:8787 $USER@$HOSTNAME`  
now, the web UI is visible at `localhost:8000` 

In [3]:
!echo $HOSTNAME
!echo $USER

dig64
pleung


In [4]:
from dask.distributed import Client
from dask_jobqueue import SLURMCluster
scratch_dir = os.path.join("/net/scratch", os.environ["USER"])
cluster = SLURMCluster(cores=1,
                       processes=1,
                       job_cpu=1,
                       memory="8GB",
                       queue="medium",
                       walltime="23:30:00", # try to prevent death timeouts
                       death_timeout=1200,
                       # write .out files to scratch, remember to delete them later
                       local_directory=scratch_dir,
                       job_extra=["-o {}".format(os.path.join(scratch_dir, "slurm-%j.out"))]
                      )
print(cluster.job_script())
cluster.adapt(minimum=0, maximum=1000, wait_count=400)  # scale between 0 and 1000 workers
client = Client(cluster)
client

#!/usr/bin/env bash

#SBATCH -J dask-worker
#SBATCH -p medium
#SBATCH -n 1
#SBATCH --cpus-per-task=1
#SBATCH --mem=8G
#SBATCH -t 23:30:00
#SBATCH -o /net/scratch/pleung/slurm-%j.out

/home/pleung/.conda/envs/cereal/bin/python -m distributed.cli.dask_worker tcp://172.16.131.94:32943 --nthreads 1 --memory-limit 8.00GB --name name --nanny --death-timeout 1200 --local-directory /net/scratch/pleung



0,1
Client  Scheduler: tcp://172.16.131.94:32943  Dashboard: http://172.16.131.94:8787/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


In [5]:
# client.close(); cluster.close()

In [7]:
import logging
import pyrosetta.distributed.io as io
from pyrosetta.distributed.cluster.core import PyRosettaCluster

logging.basicConfig(level=logging.INFO)
silents = glob(os.path.join(os.getcwd(), "01_silents/states.silent")) # TODO states

options = { 
    "-out:level": "300",
    "-in:file:silent_struct_type": "binary",
    "-holes:dalphaball": "/home/bcov/ppi/tutorial_build/main/source/external/DAlpahBall/DAlphaBall.gcc",
    "-indexed_structure_store:fragment_store": "/home/bcov/sc/scaffold_comparison/data/ss_grouped_vall_all.h5",
    "-dunbrack_prob_buried": "0.8",
    "-dunbrack_prob_nonburied": "0.8", 
    "-dunbrack_prob_buried_semi": "0.8", 
    "-dunbrack_prob_nonburied_semi": "0.8",
    "-run:constant_seed" : "1",
}

def create_tasks(silents, options):
    for silent in silents:
        for sfxn in ["beta_nov16"]: # TODO torsional
            if sfxn == "hh_19A_torsional":
                tasks = {"options": "{addnl}".format(addnl=join_flag_string("hh_19A_cleaned2.flags"))}
            else:
                tasks = {"options": "-corrections::beta_nov16 true"}
            tasks["extra_options"] = options
            tasks["set_logging_handler"] = "interactive"
            tasks["-in:file:silent"] = silent
            tasks["sfxn"] = sfxn
            yield tasks
        
if not os.getenv("DEBUG"):
    output_path = os.path.join(os.getcwd(), "02_design_and_filter") # TODO 02_design_and_filter
    PyRosettaCluster(
        tasks=create_tasks(silents, options),
        client=client,
        scratch_dir=output_path,
        output_path=output_path,
        nstruct=5,
        sha1="",
        seeds = [1, 1, 1],        
    ).distribute(protocols=[load, design, score])

https://docs.anaconda.com/anaconda/install



InvalidGitRepositoryError: The working directory is dirty! Commit local changes to ensure reproducibility.

In [11]:
def read_scorefile(scores):
    import pandas as pd
    scores = pd.read_json(scores, orient="records", typ="frame", lines=True)
    scores = scores.T
    mat = scores.values
    n = mat.shape[0]
    dicts = list(mat[range(n), range(n)])
    index = scores.index
    tabulated_scores = pd.DataFrame(dicts, index=index)
    return tabulated_scores
    
output_path = os.path.join(os.getcwd(), "02_design_and_filter")
scores = os.path.join(output_path, "scores.json")
scores_df = read_scorefile(scores)
scores_df.head()

Unnamed: 0,bb_clash,dslf_fa13,fa_atr,fa_dun_dev,fa_dun_rot,fa_dun_semi,fa_elec,fa_intra_atr_xover4,fa_intra_elec,fa_intra_rep_xover4,...,pivot_helix,pre_break_helix,pro_close,rama_prepro,ref,score,score_per_res,sfxn_used,shift,total_score
/mnt/home/pleung/projects/bistable_bundle/r3/hinges/02_design_and_filter/decoys/0000/2021.01.27.18.54.05.202933_9e0d8e9d38bd4ca592185fbc62d52734.pdb.bz2,79.254997,0.0,-1982.117179,0.09695,265.07337,349.916463,-671.439592,-125.371328,-43.525187,156.595559,...,7.0,6.0,6.824366,106.442752,-92.396114,0.0,3.468316,beta_nov16,-7.0,1092.51943
/mnt/home/pleung/projects/bistable_bundle/r3/hinges/02_design_and_filter/decoys/0000/2021.01.27.18.54.05.202933_48eac8ef147d4f82853f5cfda55235b7.pdb.bz2,15760.061523,0.0,-1411.791941,218.664795,142.0323,244.197716,-294.906511,-69.05891,-41.741428,107.73322,...,4.0,4.0,1.487669,11.427944,1.564904,0.0,171.332443,beta_nov16,-4.0,33752.490246
/mnt/home/pleung/projects/bistable_bundle/r3/hinges/02_design_and_filter/decoys/0000/2021.01.27.18.54.05.202933_42cfb57cbdd14a1ba71b5c134ba586cb.pdb.bz2,54.127998,0.0,-1452.848559,268.496,192.316443,312.843211,-456.18216,-78.102099,-39.469085,208.81989,...,4.0,4.0,0.0,28.798095,-84.03764,0.0,60.513203,beta_nov16,-1.0,12465.719473
/mnt/home/pleung/projects/bistable_bundle/r3/hinges/02_design_and_filter/decoys/0000/2021.01.27.18.54.05.202933_a3e4b4f6fe234d35824ac35d55cef737.pdb.bz2,19420.082031,0.0,-2480.855817,0.03563,237.814767,380.326041,-531.711622,-113.597061,-60.246446,137.373804,...,7.0,6.0,10.488939,95.630387,-129.03536,0.0,210.30719,beta_nov16,-2.0,64564.305146
/mnt/home/pleung/projects/bistable_bundle/r3/hinges/02_design_and_filter/decoys/0000/2021.01.27.18.54.05.202933_4f03dac74d35480dabc05b42ffa9da64.pdb.bz2,45.882999,0.0,-1043.058252,13.59718,119.004367,188.651724,-409.752516,-60.756557,-39.801217,66.713475,...,5.0,4.0,0.0,38.299686,-30.833808,0.0,17.88409,beta_nov16,4.0,3040.295486


In [12]:
bz

0,1
Client  Scheduler: tcp://172.16.131.68:38455  Dashboard: http://172.16.131.68:8787/status,Cluster  Workers: 1  Cores: 1  Memory: 8.00 GB
