In [1]:
import pyrosetta
pyrosetta.distributed.maybe_init(**{
    "options": "-corrections::beta_nov16 true",
    "extra_options":{
        "-out:level": "100",
}})

In [2]:
from glob import glob

for design_fname in glob('/home/broerman/projects/CSD/round_2/designs/*.pdb'):
    design_name = design_fname.split('/')[-1].split('_')[0]
    pose_AX, pose_BX, pose_Y, pose_BY = pyrosetta.pose_from_pdb(design_fname).split_by_chain()
    pyrosetta.rosetta.core.pose.append_pose_to_pose(pose_Y, pose_BY)
    pose_AX.dump_pdb(f'/home/broerman/projects/CSD/round_2/designs/split/{design_name}A.pdb')
    pose_BX.dump_pdb(f'/home/broerman/projects/CSD/round_2/designs/split/{design_name}B.pdb')
    pose_Y.dump_pdb(f'/home/broerman/projects/CSD/round_2/designs/split/{design_name}AB.pdb')

In [3]:
# alphafold

from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from collections import defaultdict
from IPython.utils.io import capture_output

files_by_len = defaultdict(list)

for design_fname in glob('/home/broerman/projects/CSD/round_2/designs/split/*.pdb'):

    sequence = ''
    with capture_output() as captured: # otherwise memory overloads lol
        for i, record in enumerate(SeqIO.parse(design_fname, 'pdb-atom')):
            sequence += str(record.seq)
    
    files_by_len[len(sequence)].append(design_fname)

task_lines = []

for length, file_list in files_by_len.items():

    file_str = ' '.join(file_list)
    task_lines.append(f"/home/rdkibler/software/alphafold/superfold {file_str} --models all --max_recycles 4 --initial_guess --simple_rmsd --out_dir /home/broerman/projects/CSD/round_2/af2/af2_models\n")

task_lines.sort(key=lambda x: -len(x))

with open('af2_CSDs.tasks', 'w') as f:
    f.write(''.join(task_lines))

In [4]:
print('sbatch -a 1-$(cat af2_CSDs.tasks | wc -l) af2_CSDs.sh')

sbatch -a 1-$(cat af2_CSDs.tasks | wc -l) af2_CSDs.sh
