In [1]:
import argparse
import sys
from pathlib import Path
import zipfile
from functools import wraps
from abc import ABCMeta, abstractclassmethod
import shutil as sh
import inspect
from random import choice, sample
import pickle
from collections import deque
from multiprocessing import Queue, SimpleQueue
from queue import PriorityQueue
from concurrent.futures import ProcessPoolExecutor
import concurrent.futures as cf
import re
import glob
import csv
import pandas as pd
import subprocess as sp
import shutil as sh

from Bio.PDB import PDBParser
from Bio.PDB.PDBIO import PDBIO
from Bio.SeqUtils import seq1, seq3

# import biobb_md
# from biobb_md.gromacs.gmxselect import gmxselect
# from biobb_model.model.mutate import mutate
# from biobb_md.gromacs.grompp import grompp
# from biobb_md.gromacs.solvate import solvate, Solvate
# from biobb_md.gromacs.pdb2gmx import pdb2gmx
# from biobb_analysis.gromacs.gmx_trjconv_str import gmx_trjconv_str
# from biobb_analysis.gromacs.gmx_trjconv_trj import GMXTrjConvTrj
# from biobb_analysis.gromacs.gmx_image import GMXImage
# from biobb_md.gromacs.grompp import grompp
# from biobb_md.gromacs.make_ndx import MakeNdx

data_dir = Path("/home/pbarletta/labo/22/migue/data/AB-Bind-Database-master")
pdbs_dir = Path("/home/pbarletta/labo/22/migue/run/pdbs")
mutpdbs_dir = Path("/home/pbarletta/labo/22/migue/run/mut_pdbs")
bin_dir = Path("/home/pbarletta/labo/22/locuaz/bin")
evo_bin = Path(bin_dir, "evoef2/EvoEF2")

AA_LIST = ("D", "E", "S", "T", "R", "N", "Q", "H", "K", "A", "G", "I", 
    "M", "L", "V", "P", "F", "W", "Y", "C" )

In [2]:
expdata_df = pd.read_csv(Path(data_dir, "AB-Bind_experimental_data.csv"), encoding='latin-1')
pdb_list = tuple(sorted(set(expdata_df['#PDB'])))

In [3]:
def has_proper_format(all_mut_str):

    return all([ each_mut_str[0].isupper() and each_mut_str[1] == ':'\
        and each_mut_str[2] in AA_LIST and each_mut_str[-1] in AA_LIST
        for each_mut_str in all_mut_str.split(',') ])

In [22]:
def add_agua(pdb_in: Path, pdb_out: Path):
    with open(pdb_in) as f:
        aguas = []
        for linea in f:
            if linea[17:20] == "HOH":
                aguas.append(linea)
    with open(pdb_out, 'a') as f:
        for agua in aguas:
            f.write(agua)
        f.write("END")

In [24]:
# j = pdb_list.index("1DVF")
# for pdb_id in pdb_list[j:j+1]:
for pdb_id in pdb_list:
    print(f" -- {pdb_id} -- ")
    if pdb_id == '1JRH':
        continue
    # List of mutants for this PDB
    mutants = tuple(expdata_df[expdata_df["#PDB"] == pdb_id].iterrows())
    
    # First, generete the text for the input text file for EvoEF2 BuildMutant tool
    mut_string = ""
    for fila in mutants:
        mut_str = fila[1]["Mutation"]
        mut_string += ','.join(
            [f"{muta[2]}{muta[0]}{muta[3:-1]}{muta[-1]}" for muta in mut_str.split(',')])
        mut_string += ";\n"
    
    # Now, write it out on a text file
    input_mutlist_fn = pdbs_dir / "mutlist.txt"
    with open(input_mutlist_fn, "w") as file:
            file.write(mut_string)

    # Run EvoEF2
    comando = f"{evo_bin} --command=BuildMutant --pdb={pdb_id}.pdb --mutant_file=mutlist.txt"
    salida = sp.run(
        comando,
        stdout=sp.PIPE,
        stderr=sp.PIPE,
        cwd=pdbs_dir,
        shell=True,
        text=True,
    )
    # Any errors?
    if salida.stderr:
        print(salida.stdout)
        print(salida.stderr)
    # Clean up the text file
    input_mutlist_fn.unlink()
    
    # Add water to the output PDBs, if the original had any, 
    # and move them to their proper folder
    for i, fila in enumerate(mutants, 1):
        pdb_id = fila[1]["#PDB"]
        pdb_in_path = Path(pdbs_dir, f"{pdb_id}.pdb")
        pdb_out = pdb_in_path.name.split('.')[0] + "-" + '_'.join(
            [ f"{muta[0]}:{muta[2]}{muta[3:-1]}{muta[-1]}" for muta in fila[1]["Mutation"].split(',') ])
        
        pdb_out_path = Path(mutpdbs_dir, f"{pdb_out}.pdb")
        temp_out_pdb = pdbs_dir / f"{pdb_id}_Model_{i:04d}.pdb"
        add_agua(pdb_in_path, temp_out_pdb)
        sh.copy(temp_out_pdb, pdb_out_path)
        
        # cleanup
        temp_out_pdb.unlink()

 -- 1AK4 -- 
 -- 1BJ1 -- 
 -- 1CZ8 -- 
 -- 1DQJ -- 
 -- 1DVF -- 
 -- 1FFW -- 
 -- 1JRH -- 
 -- 1JTG -- 
 -- 1KTZ -- 
 -- 1MHP -- 
 -- 1MLC -- 
 -- 1N8Z -- 
 -- 1T83 -- 
 -- 1VFB -- 
 -- 1YY9 -- 
 -- 2JEL -- 
 -- 2NY7 -- 
 -- 2NYY -- 
 -- 2NZ9 -- 
 -- 3BDY -- 
 -- 3BE1 -- 
 -- 3BN9 -- 
 -- 3HFM -- 
 -- 3K2M -- 
 -- 3NGB -- 
 -- 3NPS -- 
 -- 3WJJ -- 
 -- HM_1KTZ -- 
 -- HM_1YY9 -- 
 -- HM_2NYY -- 
 -- HM_2NZ9 -- 
 -- HM_3BN9 -- 


#### Sólo 1JRH

In [25]:
pdb_id = '1JRH'
# List of mutants for this PDB
mutants = tuple(expdata_df[expdata_df["#PDB"] == pdb_id].iterrows())

# First, generete the text for the input text file for EvoEF2 BuildMutant tool
mut_string = ""
for fila in mutants:
    mut_str = fila[1]["Mutation"]
    if not has_proper_format(mut_str):
        continue
    if mut_str == 'I:A103L,I:V104M,I:R106L,I:D107K':
        continue
    mut_string += ','.join(
        [f"{muta[2]}{muta[0]}{muta[3:-1]}{muta[-1]}" for muta in mut_str.split(',')])
    mut_string += ";\n"

# Now, write it out on a text file
input_mutlist_fn = pdbs_dir / "mutlist.txt"
with open(input_mutlist_fn, "w") as file:
        file.write(mut_string)

# Run EvoEF2
comando = f"{evo_bin} --command=BuildMutant --pdb={pdb_id}.pdb --mutant_file=mutlist.txt"
salida = sp.run(
    comando,
    stdout=sp.PIPE,
    stderr=sp.PIPE,
    cwd=pdbs_dir,
    shell=True,
    text=True,
)
# Any errors?
if salida.stderr:
    print(salida.stdout)
    print(salida.stderr)
# Clean up the text file
input_mutlist_fn.unlink()

# Move the output PDBs to their proper folder
i = 0
for fila in mutants:
    mut_str = fila[1]["Mutation"]
    if not has_proper_format(mut_str):
        continue
    if mut_str == 'I:A103L,I:V104M,I:R106L,I:D107K':
        continue
    i += 1
    pdb_id = fila[1]["#PDB"]
    pdb_in_path = Path(pdbs_dir, f"{pdb_id}.pdb")
    pdb_out = pdb_in_path.name.split('.')[0] + "-" + '_'.join(
        [ f"{muta[0]}:{muta[2]}{muta[3:-1]}{muta[-1]}" for muta in fila[1]["Mutation"].split(',') ])
    
    pdb_out_path = Path(mutpdbs_dir, f"{pdb_out}.pdb")
    temp_out_pdb = pdbs_dir / f"{pdb_id}_Model_{i:04d}.pdb"
    add_agua(pdb_in_path, temp_out_pdb)
    sh.copy(temp_out_pdb, pdb_out_path)
    
    # cleanup
    temp_out_pdb.unlink()

### checking that I actually got one PDB per mutant

In [26]:
nmutants_orig = [sum(expdata_df["#PDB"] == pdb_id) for pdb_id in pdb_list]

In [27]:
nmutants_pdbs = [17, 19, 19, 34, 38, 9, 9, 31, 22, 93, 27, 36, 244, 55, 8, 
    43, 31, 28, 19, 34, 34, 35, 48, 7, 15, 27, 25, 22, 16, 25, 16, 8]

In [28]:
for pdb, i, j in zip(pdb_list, nmutants_orig, nmutants_pdbs):
    if j < i:
        print(f"{pdb=}:  {i=} -- {j=}")

pdb='1DVF':  i=39 -- j=38
pdb='1JRH':  i=11 -- j=9
pdb='1N8Z':  i=38 -- j=36
pdb='1T83':  i=246 -- j=244


In [17]:
cosa = Path("/home/pbarletta/labo/22/migue/run/mut_pdbs/a.pdb")

- 1JRH: 2 of its mutants were discarded. 1st one because it was on a missing residue, 2nd one because it had a weird mutation specification (delta something)
- 1DVF, 1N8Z, 1T83: have repeated mutants, so only the 2nd one was kept.

---