# Boilerplate

In [1]:
# python internal 
import collections
import copy
import gc
from glob import glob
import h5py
import itertools
import os
print(os.getcwd())
import random
import re
import socket
print(socket.gethostname())
import shutil
import subprocess
import sys
# conda/pip
import dask
import graphviz
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy
import seaborn as sns
import tensorflow as tf
from tqdm import tqdm
# special packages on the DIGS
import py3Dmol
import pymol
import pyrosetta
sys.path.insert(0, "/home/pleung/bin/")
import rosetta_utils
from rosetta_utils import utils_new
# notebook magic
%matplotlib inline
%load_ext autoreload
%autoreload 2

/mnt/home/pleung/projects/bistable_bundle/r3/hinges
dig85


# Flo's original approach
2. I design the states as "dimers", examplary run including the corresponding scripts can be found in `/home/flop/switch/5thround/DHRs/des_states6_np_all_flexbb/`

thus is the last round I did, nonpolar interface, flex bb, no selection before 
design. In earlier rounds I played around with polar interface design, 
fixedbb, only rigid body movement across the jump, keeping the bb fixed in one
of the two halves etc.

but I settled for the simple approach in des_states6

3. I then filtered the designed states using standard interface metrics. I made selections of different stringency, unfortunately IO did not save the exact criteria for the least stringent one that I ended up using

but I think it was probably something like interface_sc >= 0.65, score per res
<= -2.5, geometry == 1, and some kind of sasa/cms/ddG cutoff 
(or maybe not, i might hav ignored that one here)

# I will follow Flo's design procedure with some changes.
I will use the serialization build of PyRosetta to enable recording user defined info about the designs.  
This enables downstream inline filtering and data analysis, as well as clustering by lineage.
I will try using Hugh's 19A refit weights/scorefunction, as well as `beta_nov16` to compare their efficacy.  
I tried fixbb followed by flexbb design for the design, and took out a lot of the filters I didn't think were informative.

# Make functions for design and scoring

In [21]:
def load(silent, **kwargs):
    import pyrosetta.distributed.io as io
    if silent == None:
        silent = kwargs["-in:file:silent"]
    pposes = io.poses_from_silent(silent)
    for ppose in pposes:
        yield ppose

def design(ppose, **kwargs):
    from copy import deepcopy
    import pyrosetta
    import pyrosetta.distributed.io as io
    from pyrosetta.distributed.tasks.rosetta_scripts import SingleoutputRosettaScriptsTask
    if ppose == None:
        ppose = io.pose_from_file(kwargs["s"])
    if "sfxn" in kwargs:
        sfxn = kwargs["sfxn"]
    else:
        sfxn = "beta_nov16"
    xml = """
    <ROSETTASCRIPTS>
        <SCOREFXNS>
            <ScoreFunction name="sfxn" weights="{sfxn}" /> 
            <ScoreFunction name="sfxn_design" weights="{sfxn}" >
                <Reweight scoretype="res_type_constraint" weight="1.0" />
            </ScoreFunction>
        </SCOREFXNS>
        <RESIDUE_SELECTORS>
            <Chain name="chA" chains="A"/>
            <Chain name="chB" chains="B"/>
            <Neighborhood name="interface_chA" selector="chB" distance="8.0" />
            <Neighborhood name="interface_chB" selector="chA" distance="8.0" />
            <And name="interface_AB" selectors="interface_chA,interface_chB" />
            <ResidueName name="pro_and_gly_positions" residue_name3="PRO,GLY" />
            <Or name="designable" selectors="interface_AB" />
            <Not name="not_designable" selector="designable" />
            <Layer name="surface" select_core="false" select_boundary="false" select_surface="true"
                use_sidechain_neighbors="true"/>
            <Layer name="boundary" select_core="false" select_boundary="true" select_surface="false" 
                use_sidechain_neighbors="true"/>
            <Layer name="core" select_core="true" select_boundary="false" select_surface="false" 
                use_sidechain_neighbors="true"/>
            <SecondaryStructure name="sheet" overlap="0" minH="3" minE="2" include_terminal_loops="false" 
                use_dssp="true" ss="E"/>
            <SecondaryStructure name="entire_loop" overlap="0" minH="3" minE="2" include_terminal_loops="true" 
                use_dssp="true" ss="L"/>
            <SecondaryStructure name="entire_helix" overlap="0" minH="3" minE="2" include_terminal_loops="false"
                use_dssp="true" ss="H"/>
            <And name="helix_cap" selectors="entire_loop">
                <PrimarySequenceNeighborhood lower="1" upper="0" selector="entire_helix"/>
            </And>
            <And name="helix_start" selectors="entire_helix">
                <PrimarySequenceNeighborhood lower="0" upper="1" selector="helix_cap"/>
            </And>
            <And name="helix" selectors="entire_helix">
                <Not selector="helix_start"/>
            </And>
            <And name="loop" selectors="entire_loop">
                <Not selector="helix_cap"/>
            </And>
        </RESIDUE_SELECTORS>
        <TASKOPERATIONS>
            <DesignRestrictions name="layer_design">
                <Action selector_logic="surface AND helix_start"  aas="DEHKPQR"/>
                <Action selector_logic="surface AND helix"        aas="EHKQR"/>
                <Action selector_logic="surface AND sheet"        aas="EHKNQRST"/>
                <Action selector_logic="surface AND loop"         aas="DEGHKNPQRST"/>
                <Action selector_logic="boundary AND helix_start" aas="ADEHIKLNPQRSTVWY"/>
                <Action selector_logic="boundary AND helix"       aas="ADEHIKLNQRSTVWYM"/>
                <Action selector_logic="boundary AND sheet"       aas="DEFHIKLNQRSTVWY"/>
                <Action selector_logic="boundary AND loop"        aas="ADEFGHIKLNPQRSTVWY"/>
                <Action selector_logic="core AND helix_start"     aas="AFILVWYNQSTHP"/>
                <Action selector_logic="core AND helix"           aas="AFILVWM"/>
                <Action selector_logic="core AND sheet"           aas="FILVWY"/>
                <Action selector_logic="core AND loop"            aas="AFGILPVWYSM"/>
                <Action selector_logic="helix_cap"                aas="DNSTP"/>
            </DesignRestrictions>
            <PruneBuriedUnsats name="prune" allow_even_trades="false" 
                atomic_depth_cutoff="3.5" minimum_hbond_energy="-1.0"/>
            <ProteinProteinInterfaceUpweighter name="upweight_int" interface_weight="3" />
            <LimitAromaChi2 name="arochi" chi2max="110" chi2min="70" include_trp="True" />
            <ExtraRotamersGeneric name="ex1_ex2" ex1="1" ex2="1" />
            <OperateOnResidueSubset name="int_only" selector="not_designable">
                <PreventRepackingRLT/>
            </OperateOnResidueSubset>
            <OperateOnResidueSubset name="restrict_PRO_GLY" selector="pro_and_gly_positions">
                <PreventRepackingRLT/>
            </OperateOnResidueSubset>
        </TASKOPERATIONS>
        <FILTERS>
            <ScoreType name="total_score_pose" scorefxn="sfxn" score_type="total_score" threshold="0" confidence="0" />
            <ResidueCount name="count" />
            <CalculatorFilter name="score_per_res" equation="total_score_full / res" threshold="-2.0" confidence="0">
                <Var name="total_score_full" filter="total_score_pose"/>
                <Var name="res" filter="count"/>
            </CalculatorFilter>
        </FILTERS>
        <MOVERS>
            <StructProfileMover name="gen_profile" add_csts_to_pose="1" consider_topN_frags="100" 
                eliminate_background="0" ignore_terminal_residue="1" only_loops="0"
                residue_selector="designable" burialWt="0" RMSthreshold="0.6" />
            <ClearConstraintsMover name="clear_constraints" />
            <FastDesign name="fixbb_with_jump" scorefxn="sfxn_design" repeats="1" 
                task_operations="arochi,ex1_ex2,upweight_int,restrict_PRO_GLY,layer_design,prune,int_only" 
                batch="false" ramp_down_constraints="false" cartesian="false" bondangle="false" 
                bondlength="false" min_type="dfpmin_armijo_nonmonotone" relaxscript="InterfaceDesign2019" >
                <MoveMap name="MM" >
                    <Chain number="1" chi="true" bb="false" />
                    <Chain number="2" chi="true" bb="false" />
                    <Jump number="1" setting="true" />
                </MoveMap>
            </FastDesign>
            <FastDesign name="flexbb_with_jump" scorefxn="sfxn_design" repeats="2"
                task_operations="arochi,ex1_ex2,upweight_int,restrict_PRO_GLY,layer_design,prune,int_only" 
                batch="false" ramp_down_constraints="false" cartesian="false" bondangle="false" 
                bondlength="false" min_type="dfpmin_armijo_nonmonotone" relaxscript="InterfaceDesign2019" >
                <MoveMap name="MM" >
                    <Chain number="1" chi="true" bb="true" />
                    <Chain number="2" chi="true" bb="true" />
                    <Jump number="1" setting="true" />
                </MoveMap>
            </FastDesign>
        </MOVERS>
        <APPLY_TO_POSE>
        </APPLY_TO_POSE>
        <PROTOCOLS>
            <Add filter="score_per_res" /> # TODO
            <Add mover="gen_profile" />
            Add mover="fixbb_with_jump" />
            Add mover="flexbb_with_jump" />
            <Add mover="clear_constraints" />
        </PROTOCOLS>
        <OUTPUT scorefxn="sfxn" />
    </ROSETTASCRIPTS>
    """.format(sfxn=sfxn)
    fast_design = SingleoutputRosettaScriptsTask(xml)
    designed_ppose = fast_design(ppose.pose.clone())
    yield designed_ppose

In [17]:
def score(ppose, **kwargs):
    import pyrosetta
    import pyrosetta.distributed.io as io
    from pyrosetta.distributed.tasks.rosetta_scripts import SingleoutputRosettaScriptsTask
    if ppose == None:
        ppose = io.pose_from_file(kwargs["s"])
    xml = """
    <ROSETTASCRIPTS>
        <SCOREFXNS>
            <ScoreFunction name="sfxn" weights="beta_nov16" />
            <ScoreFunction name="sfxn_design" weights="beta_nov16" >
                <Reweight scoretype="res_type_constraint" weight="1.0" />
            </ScoreFunction>
        </SCOREFXNS>
        <RESIDUE_SELECTORS>
            <Chain name="chA" chains="A"/>
            <Chain name="chB" chains="B"/>
            <Neighborhood name="interface_chA" selector="chB" distance="8.0" />
            <Neighborhood name="interface_chB" selector="chA" distance="8.0" />
            <And name="interface_AB" selectors="interface_chA,interface_chB" />            
        </RESIDUE_SELECTORS>
        <TASKOPERATIONS>
            <ProteinInterfaceDesign name="pack_long" design_chain1="0" design_chain2="0" jump="1" interface_distance_cutoff="15"/>
        </TASKOPERATIONS>
        <MOVERS>
            <SwitchChainOrder name="cutB" chain_order="1" />
            <SwitchChainOrder name="cutA" chain_order="2" />
            <ScoreMover name="scorepose" scorefxn="sfxn" verbose="false" />
            <TaskAwareMinMover name="min" scorefxn="sfxn" bb="0" chi="1" task_operations="pack_long" />
        </MOVERS>
        <FILTERS>
            <Ddg name="ddg" threshold="-10" jump="1" repeats="5" repack="1" relax_mover="min" confidence="0" scorefxn="sfxn" />
            <Sasa name="interface_buried_sasa" confidence="0" />
            <PackStat name="packstat" threshold="0" chain="1" repeats="5"/>
            <SSPrediction name="mismatch_probability" confidence="0" cmd="/software/psipred4/runpsipred_single" use_probability="1" mismatch_probability="1" use_svm="0" />
            <MoveBeforeFilter name="psipred_A" mover="cutB" filter="mismatch_probability" confidence="0"/>
            <MoveBeforeFilter name="psipred_B" mover="cutA" filter="mismatch_probability" confidence="0"/>
            <SSShapeComplementarity name="sc" verbose="1" loops="1" helices="1" />
            <ShapeComplementarity name="sc_int" verbose="0" min_sc="0.55" write_int_area="1" write_median_dist="1" jump="1" confidence="0"/>
            <TaskAwareScoreType name="tot_score" scorefxn="sfxn" score_type="total_score" threshold="0" mode="total"  confidence="0" />
            <MoveBeforeFilter name="score_A" mover="cutB" filter="tot_score" confidence="0"/>
            <MoveBeforeFilter name="score_B" mover="cutA" filter="tot_score" confidence="0"/>
            <ScoreType name="total_score_pose" scorefxn="sfxn" score_type="total_score" threshold="0" confidence="0" />
            <ResidueCount name="count" />
            <CalculatorFilter name="score_per_res" equation="total_score_full / res" threshold="-2.0" confidence="0">
                <Var name="total_score_full" filter="total_score_pose"/>
                <Var name="res" filter="count"/>
            </CalculatorFilter>
            <worst9mer name="9mer" rmsd_lookup_threshold="0.4" confidence="0" />
            <worst9mer name="wnm_all" rmsd_lookup_threshold="0.4" confidence="0" />
            <MoveBeforeFilter name="wnm_A" mover="cutB" filter="9mer" confidence="0"/>
            <MoveBeforeFilter name="wnm_B" mover="cutA" filter="9mer" confidence="0"/>
        </FILTERS>
        <APPLY_TO_POSE>
        </APPLY_TO_POSE>
        <PROTOCOLS>
            <Add filter_name="ddg" />
            <Add filter_name="interface_buried_sasa" />
            <Add filter_name="packstat"/>
            <Add filter_name="psipred_A"/>
            <Add filter_name="psipred_B"/>
            <Add filter_name="sc" />
            <Add filter_name="sc_int" />
            <Add filter_name="score_A"/>
            <Add filter_name="score_B"/>
            <Add filter_name="score_per_res" />
            <Add filter_name="wnm_all"/>
            <Add filter_name="wnm_A"/>
            <Add filter_name="wnm_B"/>
        </PROTOCOLS>
        <OUTPUT scorefxn="sfxn" />
    </ROSETTASCRIPTS>
    """
    scored = SingleoutputRosettaScriptsTask(xml)
    scored_ppose = scored(ppose)
    scored_ppose = scored(ppose.pose.clone())
    return scored_ppose

In [30]:
import pyrosetta.distributed.io as io

silents = glob("./01_silents/*.silent")
pposes = io.poses_from_silent(silents[0])

testp = next(pposes)
# 

def join_flag_string(file:str) -> str:
    string = ""
    with open(file) as f:
        for line in f:
            if "#" not in line:
                string += line
            else:
                pass
    return string
            
# -corrections::beta_nov16 true 
pyrosetta.init("""
               -holes:dalphaball /home/bcov/ppi/tutorial_build/main/source/external/DAlpahBall/DAlphaBall.gcc 
               -indexed_structure_store:fragment_store /net/databases/VALL_clustered/connect_chains/ss_grouped_vall_all.h5
                {addnl} 
               -dunbrack_prob_buried 0.8 -dunbrack_prob_nonburied 0.8 -dunbrack_prob_buried_semi 0.8 -dunbrack_prob_nonburied_semi 0.8
               """.format(addnl=join_flag_string("hh_19A.flags"))) 

print(join_flag_string("hh_19A.flags"))

test_design = next(design(testp, sfxn="hh_19A_torsional.wts"))

print(test_design.pose.scores)

PyRosetta-4 2020 [Rosetta PyRosetta4.conda.linux.cxx11thread.serialization.CentOS.python37.Release 2020.50+release.1295438cd4bd2be39c9dbbfab8db669ab62415ab 2020-12-12T00:30:01] retrieved from: http://www.pyrosetta.org
(C) Copyright Rosetta Commons Member Institutions. Created in JHU by Sergey Lyskov and PyRosetta Team.
-renumber_pdb
-preserve_crystinfo

-jd2:delete_old_poses

-mapreso 2
-grid_spacing 0.8
-atom_mask 2
-sliding_window 1
-unmask_bb


-chemical::clone_atom_types fa_standard:CH1:CHR1

-chemical::clone_atom_types fa_standard:CH1:CHR2

-chemical::clone_atom_types fa_standard:Hapo:HapR

-chemical::clone_atom_types fa_standard:Hapo:HAbb

-chemical::reassign_atom_types fa_standard:ARG:CD:CHR1
-chemical::reassign_atom_types fa_standard:ARG:CZ:CHR1
-chemical::reassign_atom_types fa_standard:LYS:CE:CHR1

-chemical::reassign_atom_types fa_standard:ARG:CG:CHR2
-chemical::reassign_atom_types fa_standard:ARG:CB:CHR2
-chemical::reassign_atom_types fa_standard:LYS:CD:CHR2
-chemical::reas

# Setup Dask
Trying a adaptive SLURMCluster. to see the dashboard, forward port `8787` to `8000`:  
`local$ ssh -L 8000:localhost:8787 $USER@$HOSTNAME`  
now, the web UI is visible at `localhost:8000`  
if you're using a local cluster make sure the node this notebook is on has the same 
number of workers as cores

In [4]:
!echo $HOSTNAME
!echo $USER

dig108
pleung


In [5]:
from dask.distributed import Client, LocalCluster
from dask_jobqueue import SLURMCluster

# cluster = LocalCluster(n_workers=16, threads_per_worker=1)
# client = Client(cluster)
# client

# scratch_dir = os.path.join("/net/scratch", os.environ["USER"])
cluster = SLURMCluster(cores=1,
                       processes=1,
                       job_cpu=1,
                       memory="4GB",
                       queue="medium",
                      )
#                        local_directory=scratch_dir,
#                        job_extra=["-o {}".format(os.path.join(scratch_dir, "slurm-%j.out"))])
print(cluster.job_script())
# cluster.scale(4)
cluster.adapt(minimum=0, maximum=1000)  # scale between 0 and 100 workers
client = Client(cluster)

#!/usr/bin/env bash

#SBATCH -J dask-worker
#SBATCH -p medium
#SBATCH -n 1
#SBATCH --cpus-per-task=1
#SBATCH --mem=4G
#SBATCH -t 00:30:00

/home/pleung/.conda/envs/cereal/bin/python -m distributed.cli.dask_worker tcp://172.16.131.212:35071 --nthreads 1 --memory-limit 4.00GB --name name --nanny --death-timeout 60



In [6]:
client

0,1
Client  Scheduler: tcp://172.16.131.212:35071  Dashboard: http://172.16.131.212:8787/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


In [7]:
# client.close(); cluster.close()

In [8]:
def load(silent, **kwargs):
    import pyrosetta.distributed.io as io
    if silent == None:
        silent = kwargs["-in:file:silent"]
    pposes = io.poses_from_silent(silent)
    for ppose in pposes:
        yield ppose

In [9]:
import logging

import pyrosetta.distributed.io as io
from pyrosetta.distributed.cluster.core import PyRosettaCluster

logging.basicConfig(level=logging.INFO)

silents = glob("../01_make_states/*.silent")

options = { 
    "-out:level": "300",
    "-in:file:silent_struct_type": "binary",
    "-holes:dalphaball": "/home/bcov/ppi/tutorial_build/main/source/external/DAlpahBall/DAlphaBall.gcc",
    "-indexed_structure_store:fragment_store": "/home/bcov/sc/scaffold_comparison/data/ss_grouped_vall_all.h5",
    "-corrections::beta_nov16": "true",
    "-dunbrack_prob_buried": "0.8",
    "-dunbrack_prob_nonburied": "0.8", 
    "-dunbrack_prob_buried_semi": "0.8", 
    "-dunbrack_prob_nonburied_semi": "0.8",
}

def create_tasks(silents, options):
    for silent in silents:
        tasks = {
            "options": "", # TODO
            "extra_options": options,
            "set_logging_handler": "interactive",
            "-in:file:silent": os.path.join(os.getcwd(),silent),
        }
        yield tasks
        
if not os.getenv("DEBUG"):
    output_path = os.path.join(os.getcwd(), "02_design_and_filter")
    PyRosettaCluster(
        tasks=create_tasks(silents, options),
        client=client,
        scratch_dir=output_path,
        output_path=output_path,
        nstruct=5,
    ).distribute(protocols=[load, design, score])

Importing 'pyrosetta.distributed.cluster.exceptions' requires the third-party package 'billiard' as a dependency!
Please install this package into your python environment. For installation instructions, visit:
https://pypi.org/project/billiard/



ModuleNotFoundError: No module named 'billiard'