# Make bound and free states from the prepped inputs

### Imports

In [1]:
# %load_ext lab_black
# Python standard library
from glob import glob
import os
import socket
import sys

# 3rd party library imports
import dask
import matplotlib.pyplot as plt
import pandas as pd
import pyrosetta
import numpy as np
import scipy
import seaborn as sns
from tqdm.auto import tqdm  # jupyter compatible progress bar

tqdm.pandas()  # link tqdm to pandas
# Notebook magic
# save plots in the notebook
%matplotlib inline
# reloads modules automatically before executing cells
%load_ext autoreload
%autoreload 2
print(f"running in directory: {os.getcwd()}")  # where are we?
print(f"running on node: {socket.gethostname()}")  # what node are we on?

running in directory: /home/broerman/crispy_shifty/projects/OPS/round_1/design
running on node: sofia


### Make helix-bound states from the scaffolds

In [6]:
sys.path.insert(0, "/home/broerman/crispy_shifty")
from crispy_shifty.utils.io import gen_array_tasks

simulation_name = "02_make_bound_states"
design_list_file = "/home/broerman/crispy_shifty/projects/OPS/round_1/design/01_prep_inputs/prepped_inputs.list"
output_path = os.path.join("/home/broerman/crispy_shifty/projects/OPS/round_1/design", simulation_name)

options = " ".join(
    [
        "out:level 200",
    ]
)

gen_array_tasks(
    distribute_func="crispy_shifty.protocols.states.make_bound_states",
    design_list_file=design_list_file,
    output_path=output_path,
    queue="short",
    memory="3G",
    nstruct=1,
    nstruct_per_task=1,
    options=options,
    simulation_name=simulation_name,
)

Run the following command with your desired environment active:
sbatch -a 1-32266 /mnt/projects/crispy_shifty/scaffolds/02_make_bound_states/run.sh


### Make free states from the scaffolds

In [8]:
sys.path.insert(0, "/home/broerman/crispy_shifty")
from crispy_shifty.utils.io import gen_array_tasks

simulation_name = "02_make_free_states"
design_list_file = "/home/broerman/crispy_shifty/projects/OPS/round_1/design/01_prep_inputs/prepped_inputs.list"
output_path = os.path.join("/home/broerman/crispy_shifty/projects/OPS/round_1/design", simulation_name)

options = " ".join(
    [
        "out:level 200",
    ]
)
extra_kwargs = {
    "include_additional_chains": "true"
}

gen_array_tasks(
    distribute_func="crispy_shifty.protocols.states.make_free_states",
    design_list_file=design_list_file,
    output_path=output_path,
    queue="short",
    memory="3G",
    nstruct=1,
    nstruct_per_task=1,
    options=options,
    simulation_name=simulation_name,
)

Run the following command with your desired environment active:
sbatch -a 1-32266 /mnt/projects/crispy_shifty/scaffolds/02_make_free_states/run.sh


### Collect scorefiles of the bound and free states

In [3]:
sys.path.insert(0, "/home/broerman/crispy_shifty")
from crispy_shifty.utils.io import collect_score_file

simulation_name = "02_make_bound_states"
output_path = os.path.join("/home/broerman/crispy_shifty/projects/OPS/round_1/design", simulation_name)

if not os.path.exists(os.path.join(output_path, "scores.json")):
    collect_score_file(output_path, "scores")

simulation_name = "02_make_free_states"
output_path = os.path.join("/home/broerman/crispy_shifty/projects/OPS/round_1/design", simulation_name)

if not os.path.exists(os.path.join(output_path, "scores.json")):
    collect_score_file(output_path, "scores")

  from distributed.utils import tmpfile


### Load resulting scorefiles of bound and free states

In [4]:
sys.path.insert(0, "/home/broerman/crispy_shifty")
from crispy_shifty.utils.io import parse_scorefile_linear

bound_scores_df = parse_scorefile_linear(
    "/home/broerman/crispy_shifty/projects/OPS/round_1/design/02_make_bound_states/scores.json"
)

free_scores_df = parse_scorefile_linear(
    "/home/broerman/crispy_shifty/projects/OPS/round_1/design/02_make_free_states/scores.json"
)

  0%|          | 0/550533 [00:00<?, ?it/s]

  0%|          | 0/663475 [00:00<?, ?it/s]

### Dump scorefiles as CSVs and then reload, for performance reasons

In [3]:
if not os.path.exists(
    "/home/broerman/crispy_shifty/projects/OPS/round_1/design/02_make_bound_states/scores.csv"
):
    bound_scores_df.to_csv(
        "/home/broerman/crispy_shifty/projects/OPS/round_1/design/02_make_bound_states/scores.csv"
    )

bound_scores_df = pd.read_csv(
    "/home/broerman/crispy_shifty/projects/OPS/round_1/design/02_make_bound_states/scores.csv",
    index_col="Unnamed: 0",
)

if not os.path.exists(
    "/home/broerman/crispy_shifty/projects/OPS/round_1/design/02_make_free_states/scores.csv"
):
    free_scores_df.to_csv(
        "/home/broerman/crispy_shifty/projects/OPS/round_1/design/02_make_free_states/scores.csv"
    )

free_scores_df = pd.read_csv(
    "/home/broerman/crispy_shifty/projects/OPS/round_1/design/02_make_free_states/scores.csv",
    index_col="Unnamed: 0",
)

### Save a list of outputs

In [4]:
with open(
    "/home/broerman/crispy_shifty/projects/OPS/round_1/design/02_make_bound_states/bound_states.list", "w"
) as f:
    for path in tqdm(bound_scores_df.index):
        print(path, file=f)
with open(
   "/home/broerman/crispy_shifty/projects/OPS/round_1/design/02_make_free_states/free_states.list", "w"
) as f:
    for path in tqdm(free_scores_df.index):
        print(path, file=f)

  0%|          | 0/550533 [00:00<?, ?it/s]

  0%|          | 0/663475 [00:00<?, ?it/s]

### Save also a CSV of just free states that have 0 shift
We will need them later

In [6]:
output_path = "/home/broerman/crispy_shifty/projects/OPS/round_1/design/02_make_free_states/free_state_0s.csv"
free_state_0s = free_scores_df.query("shift == 0 and pivot_helix == pre_break_helix")
free_state_0s.to_csv(output_path)