# Prep filtered scaffold sets for distributed design

### Imports

In [1]:
# %load_ext lab_black
# Python standard library
from glob import glob
import os
import socket
import sys

# 3rd party library imports
import dask
import matplotlib.pyplot as plt
import pandas as pd
import pyrosetta
import numpy as np
import scipy
import seaborn as sns
from tqdm.auto import tqdm  # jupyter compatible progress bar

tqdm.pandas()  # link tqdm to pandas
# Notebook magic
# save plots in the notebook
# %matplotlib inline
# # reloads modules automatically before executing cells
# %load_ext autoreload
# %autoreload 2
print(f"running in directory: {os.getcwd()}")  # where are we?
print(f"running on node: {socket.gethostname()}")  # what node are we on?

running in directory: /home/broerman/crispy_shifty/projects/OPS
running on node: sofia


### Domesticate the scaffolds by trimming off leading and trailing loops and adding metadata to the output pdb.bz2s. 

In [2]:
design_list_file = "/home/broerman/crispy_shifty/projects/OPS/00_scaffold_sets/inputs.list"

with open(design_list_file, "w") as f:
    f.write("\n".join(sorted(glob("/home/broerman/crispy_shifty/projects/OPS/00_scaffold_sets/inputs/*.pdb"))))

In [6]:
sys.path.insert(0, "/home/broerman/crispy_shifty")
from crispy_shifty.utils.io import gen_array_tasks

simulation_name = "01_prep_inputs"
design_list_file = "/home/broerman/crispy_shifty/projects/OPS/00_scaffold_sets/inputs.list"
output_path = os.path.join("/home/broerman/crispy_shifty/projects/OPS/", f"{simulation_name}")

options = " ".join(
    [
        "out:level 200",
        "corrections:beta_nov16 true",
        "holes:dalphaball /software/rosetta/DAlphaBall.gcc",
    ]
)
extra_kwargs = {
    "metadata_csv": "/home/broerman/crispy_shifty/projects/OPS/00_scaffold_sets/metadata.csv",
    "chains_to_keep": "12",
    "num_ss_per_repeat": "2",
    "fixed_resis": "distribute"
}

gen_array_tasks(
    distribute_func="crispy_shifty.protocols.cleaning.add_metadata_to_input",
    design_list_file=design_list_file,
    output_path=output_path,
    queue="short",
    memory="2G",
    nstruct=1,
    nstruct_per_task=1,
    options=options,
    extra_kwargs=extra_kwargs,
    simulation_name=simulation_name,
)

InvalidGitRepositoryError: The working directory is dirty! Commit local changes to ensure reproducibility.

### Collect scorefiles of the prepped input scaffolds

In [3]:
sys.path.insert(0, "/projects/crispy_shifty")
from crispy_shifty.utils.io import collect_score_file

simulation_name = "01_prep_inputs"
output_path = os.path.join(os.getcwd(), f"scaffolds/{simulation_name}")

if not os.path.exists(os.path.join(output_path, "scores.json")):
    collect_score_file(output_path, "scores")

  from distributed.utils import tmpfile


### Load resulting concatenated scorefile

In [4]:
sys.path.insert(0, "/projects/crispy_shifty")
from crispy_shifty.utils.io import parse_scorefile_linear

output_path = os.path.join(os.getcwd(), f"scaffolds/{simulation_name}")

if not os.path.exists(os.path.join(output_path, "scores.csv")):
    scores_df = parse_scorefile_linear(os.path.join(output_path, "scores.json"))

  0%|          | 0/32266 [00:00<?, ?it/s]

### Dump scores_df as a CSV and then reload, for performance reasons

In [5]:
if not os.path.exists(os.path.join(output_path, "scores.csv")):
    scores_df.to_csv(os.path.join(output_path, "scores.csv"))

scores_df = pd.read_csv(os.path.join(output_path, "scores.csv"), index_col="Unnamed: 0")

### Save a list of outputs

In [6]:
with open(
    os.path.join(os.getcwd(), "scaffolds/01_prep_inputs/prepped_inputs.list"), "w"
) as f:
    for path in scores_df.index:
        print(path, file=f)

### Prototyping blocks

In [None]:
%%time 
import pyrosetta

pyrosetta.init(
    "-corrections::beta_nov16 true \
    -detect_disulf false \
    -holes:dalphaball /software/rosetta/DAlphaBall.gcc "  # \
    #     -indexed_structure_store:fragment_store /home/bcov/sc/scaffold_comparison/data/ss_grouped_vall_all.h5"
)


sys.path.insert(0, "/projects/crispy_shifty/")
from crispy_shifty.protocols.cleaning import remove_terminal_loops, redesign_disulfides


t = next(
    remove_terminal_loops(
        None,
        pdb_path="/net/shared/scaffolds/pre_scaffold_DB/tj_junctions/DHR82_DHR79_l3_t1_t2_9_v4c.pdb",
        metadata={
            "pdb": "/net/shared/scaffolds/pre_scaffold_DB/tj_junctions/DHR82_DHR79_l3_t1_t2_9_v4c.pdb",
            "topo": "HHHHHHHH",
            "best_model": 2,
            "best_average_plddts": 96.0650150399,
            "best_ptm": 0.8458813818,
            "best_rmsd_to_input": 1.2088154485,
            "best_average_DAN_plddts": 0.947265625,
            "scaffold_type": "tj_junctions",
        },
    )
)

t2 = next(redesign_disulfides(t))