In [2]:
import sys
import glob
from pathlib import Path
import itertools
import pandas as pd
import mdtraj as md
import logging
from collections import Counter
import pickle
import subprocess
import importlib
from collections import namedtuple
Chains = namedtuple('Chains', ['antibody', 'antigen'])
PiPiPair = namedtuple('PiPiPair', ['antibody', 'antigen'])
PionPair = namedtuple('PionPair', ['ring', 'ion'])
HBondAtom = namedtuple('HBondAtom', ['chainID', 'chain_type',
                       'CDR', 'resSeq', 'resname', 'index', 'serial', 'element', 'is_sidechain'])
HBond = namedtuple('HBond', ['donor', 'acceptor'])
ShieldingAtom = namedtuple(
    'ShieldingAtom', ['chainID', 'chain_type', 'CDR', 'resSeq', 'resname', 'index',
                      'serial', 'element', 'is_sidechain'])
res_SSE = namedtuple('res_SSE', ['index', 'resSeq', 'name', 'DSSP'])

source_location = Path().resolve()
sys.path.append(source_location)
from scripts.utils import get_sabdab_details

from scripts.abag_interactions_rings import *
from scripts.more_utils import *

casa_dir = Path("/home/pbarletta/labo/22/AbAgInterface")
str_dir = Path.joinpath(casa_dir, "structures/raw")
exposed_dir = Path.joinpath(casa_dir, "structures/exposed")

In [59]:
pdb_list = []
with open(Path.joinpath(casa_dir, "full_pdb.list"), 'r') as file:
    for linea in file:
        pdb_list.append(linea.strip())
df_dataset = get_df_dataset(casa_dir)
with open(Path.joinpath(
    casa_dir, "data", 'buried_ab_ag_interface_res.pickle'), 'rb') as file:
    df_interface = pickle.load(file)

SabDab protein antigen:
1154 proteins out of 2017, 57.2%
All: 1154
No Hchain: 0
No Lchain: 0
Both chains: 1154
Buried surfaces of 2492 proteins
with both chains: 867


----

## Hydrophobic

In [168]:
import scripts.abag_interactions_hydrophobic
importlib.reload(scripts.abag_interactions_hydrophobic)
from scripts.abag_interactions_hydrophobic import *

hbond_dir = Path.joinpath(casa_dir, "hbonds")
with open(Path.joinpath(casa_dir, 'data', 'filenames.pkl'), 'rb') as file:
    filenames = pickle.load(file)
with open(Path.joinpath(casa_dir, 'data', 'chains.pkl'), 'rb') as file:
    chains = pickle.load(file)

pdb_list = list(filenames.keys())
df_dataset = get_df_dataset(casa_dir)

SabDab protein antigen:
1154 proteins out of 2017, 57.2%
All: 1154
No Hchain: 0
No Lchain: 0
Both chains: 1154
Buried surfaces of 2492 proteins
with both chains: 867


In [198]:
check_pdb = '7k8w'
idx = pdb_list.index(check_pdb)
for pdb_idcode in [pdb_list[idx]]:
# for pdb_idcode in pdb_list:
    print(f"{pdb_idcode}", flush=True)

    pdb_filename = Path(filenames[pdb_idcode])
    trj_in = md.load(Path.joinpath(exposed_dir, pdb_idcode, pdb_filename))
    ab_chains = chains[pdb_idcode].antibody
    ag_chain = chains[pdb_idcode].antigen
    #
    # Hydrophobic clusters
    #
    try:
        G = get_carbons_graph(trj_in, df_dataset, pdb_idcode,
                              ab_chains, ag_chain, cutoff_carbons)
        pre_clusteres = get_putative_clusters(G)
        clusters = merge_clusters(trj_in, pre_clusteres, cutoff_clusters)
        all_atom_indices, all_atom_serials, all_resSeq, all_resname,\
            all_chainIDs, all_chain_type, all_cdr = get_data_from_clusteres(
                pdb_idcode, trj_in, clusters, ab_chains, df_dataset)
    except Exception as e:
        logging.warning(
            f"- {pdb_idcode} raised: {e.__class__}, saying: {e}, during hydrophobic "
            f"interactions calculation. Probably has no hydrophobic interactions.")
        raise e

7k8w


In [199]:
ag_chain, ab_chains

(('G', '.'), ('N', 'S'))

In [200]:
all_epitope_atoms = list(
        set(
            itertools.chain(
                *
                [fila.epitope_atoms for index, fila
                 in df_dataset.query(
                     f"idcode == '{pdb_idcode}' and chainID in {ab_chains}").
                 iterrows()])))

In [197]:
df_dataset.query(
                     f"idcode == '{pdb_idcode}' and chainID in {ab_chains}")

Unnamed: 0,idcode,chainID,chain_type,cdr,cdr_seq,cdr_begin,cdr_end,cdr_atoms,epitope_atoms,epitope_residues,ag_ab_interface,ag_cdrchain_interface,ag_cdr_interface,ab_ag_interface,ag_ab_interface_res,ag_cdrchain_interface_res,ag_cdr_interface_res,ab_ag_interface_res
23259,7k8v,H,H,1,GYSFTSY,26,32,"[22466, 22467, 22468, 22469, 22470, 22471, 224...",[],[],"{5505, 5507, 5510, 5511, 5512, 5516, 5520, 552...","{5520, 5524, 5525, 5526, 6167, 6171, 5534, 553...",{},"{30721, 31873, 31876, 31883, 32910, 33555, 329...","[(A, 442, ASP, HB3, 1), (A, 448, ASN, CA, 1), ...","[(A, 449, TYR, N, 1), (A, 449, TYR, CB, 1), (A...",[],"[(H, 33, TRP, CZ3, 1), (H, 100, ALA, HA, 1), (..."
23260,7k8v,H,H,2,YPGDSD,52,56,"[22679, 22680, 22681, 22682, 22683, 22684, 226...",[],[],"{5505, 5507, 5510, 5511, 5512, 5516, 5520, 552...","{5520, 5524, 5525, 5526, 6167, 6171, 5534, 553...",{},"{30721, 31873, 31876, 31883, 32910, 33555, 329...","[(A, 442, ASP, HB3, 2), (A, 448, ASN, CA, 2), ...","[(A, 449, TYR, N, 2), (A, 449, TYR, CB, 2), (A...",[],"[(H, 33, TRP, CZ3, 2), (H, 100, ALA, HA, 2), (..."
23261,7k8v,H,H,3,SFRDDPRIAVAGPADAFDI,95,102,"[23028, 23029, 23030, 23031, 23032, 23033, 230...",[],[],"{5505, 5507, 5510, 5511, 5512, 5516, 5520, 552...","{5520, 5524, 5525, 5526, 6167, 6171, 5534, 553...","{5520, 5524, 5525, 5526, 6167, 6171, 5534, 553...","{30721, 31873, 31876, 31883, 32910, 33555, 329...","[(A, 442, ASP, HB3, 3), (A, 448, ASN, CA, 3), ...","[(A, 449, TYR, N, 3), (A, 449, TYR, CB, 3), (A...","[(A, 449, TYR, N, 3), (A, 449, TYR, CB, 3), (A...","[(H, 33, TRP, CZ3, 3), (H, 100, ALA, HA, 3), (..."
23265,7k8v,L,K,1,RASQSISYWLA,24,34,"[23349, 23350, 23351, 23352, 23353, 23354, 233...",[],[],"{5505, 5507, 5510, 5511, 5512, 5516, 5520, 552...","{5505, 5507, 5510, 5511, 5512, 5516, 5500, 399...","{5505, 5507, 5510, 5511, 5512, 5516, 6254, 549...","{30721, 31873, 31876, 31883, 32910, 33555, 329...","[(A, 442, ASP, HB3, 1), (A, 448, ASN, CA, 1), ...","[(A, 442, ASP, HB3, 1), (A, 448, ASN, CA, 1), ...","[(A, 442, ASP, HB3, 1), (A, 448, ASN, CA, 1), ...","[(H, 33, TRP, CZ3, 1), (H, 100, ALA, HA, 1), (..."
23266,7k8v,L,K,2,QASSLES,50,56,"[23565, 23566, 23567, 23568, 23569, 23570, 235...",[],[],"{5505, 5507, 5510, 5511, 5512, 5516, 5520, 552...","{5505, 5507, 5510, 5511, 5512, 5516, 5500, 399...",{4014},"{30721, 31873, 31876, 31883, 32910, 33555, 329...","[(A, 442, ASP, HB3, 2), (A, 448, ASN, CA, 2), ...","[(A, 442, ASP, HB3, 2), (A, 448, ASN, CA, 2), ...","[(A, 346, ARG, CZ, 2)]","[(H, 33, TRP, CZ3, 2), (H, 100, ALA, HA, 2), (..."
23267,7k8v,L,K,3,QQYNSYPYT,89,97,"[23845, 23846, 23847, 23848, 23849, 23850, 238...",[],[],"{5505, 5507, 5510, 5511, 5512, 5516, 5520, 552...","{5505, 5507, 5510, 5511, 5512, 5516, 5500, 399...","{6251, 6268, 6245, 6247}","{30721, 31873, 31876, 31883, 32910, 33555, 329...","[(A, 442, ASP, HB3, 3), (A, 448, ASN, CA, 3), ...","[(A, 442, ASP, HB3, 3), (A, 448, ASN, CA, 3), ...","[(A, 499, PRO, HB2, 3), (A, 500, THR, HG21, 3)...","[(H, 33, TRP, CZ3, 3), (H, 100, ALA, HA, 3), (..."


In [None]:
draw_clusters(trj_in, df_dataset, pdb_idcode,
    Path.joinpath(exposed_dir, pdb_idcode, pdb_filename),
    ab_chains, ag_chain, clusters, "clusters.py")

In [28]:
TYRs = namedtuple('TYRs', ['heavy', 'light', 'antigen'])

'A'

## Pi-Pi

In [None]:
bad_pdbs = []
df_PiPi_atom_indices = pd.DataFrame(
    columns=['idcode', 'atom_indices'])
df_PiPi_atom_serials = pd.DataFrame(
    columns=['idcode', 'atom_serials'])
df_PiPi_resSeq = pd.DataFrame(
    columns=['idcode', 'resSeq'])
df_PiPi_resname = pd.DataFrame(
    columns=['idcode', 'resname'])
df_PiPi_chain_ID = pd.DataFrame(
    columns=['idcode', 'chain_ID'])
df_PiPi_chain_type = pd.DataFrame(
    columns=['idcode', 'chain_type'])
df_PiPi_cdr = pd.DataFrame(
    columns=['idcode', 'CDR'])

check_pdb = '2zuq'
idx = pdb_list.index(check_pdb)
for pdb_idcode, pdb_file, is_cpx in zip([pdb_list[idx]], [file_pdb_list[idx]], [is_cpx_pdb[idx]]):
# for pdb_idcode, pdb_file, is_cpx in zip(pdb_list, file_pdb_list, is_cpx_pdb):
    print(f"{pdb_idcode}", flush=True)

    # BAD PDBs were discarded already. This should never evaluate to True.
    if lacks_epitope_atoms(df_dataset, pdb_idcode):
        print(f" ----- BAD: {pdb_idcode} ----- ")
        bad_pdbs.append(pdb_idcode)

        df_PiPi_atom_indices = pd.concat(
            [df_PiPi_atom_indices, pd.DataFrame(
                {'idcode': [pdb_idcode]})])
        df_PiPi_atom_serials = pd.concat(
            [df_PiPi_atom_serials, pd.DataFrame(
                {'idcode': [pdb_idcode]})])
        df_PiPi_resSeq = pd.concat(
            [df_PiPi_resSeq, pd.DataFrame({'idcode': [pdb_idcode]})])
        df_PiPi_resname = pd.concat(
            [df_PiPi_resname, pd.DataFrame({'idcode': [pdb_idcode]})])
        df_PiPi_chain_ID = pd.concat(
            [df_PiPi_chain_ID, pd.DataFrame({'idcode': [pdb_idcode]})])
        df_PiPi_chain_type = pd.concat(
            [df_PiPi_chain_type, pd.DataFrame({'idcode': [pdb_idcode]})])
        df_PiPi_cdr = pd.concat(
            [df_PiPi_cdr, pd.DataFrame({'idcode': [pdb_idcode]})])
        continue

    if is_cpx:
        pdb_filename = Path.joinpath(str_dir, pdb_idcode + '.pdb')
    else:
        pdb_filename = Path.joinpath(casa_dir, pdb_file)
    trj_in = md.load(pdb_filename)

    all_ab_chains = [
        fila.chainID for i,
        fila in df_dataset[df_dataset.idcode == pdb_idcode].iterrows()]
    ag_chains = [
        chain.chain_id for chain in trj_in.topology.chains
        if chain.chain_id not in all_ab_chains]
    ab_chains = [
        chain.chain_id for chain in trj_in.topology.chains
        if chain.chain_id not in ag_chains]

    #
    # Pi-Pi interactions
    #
    try:
        CG_rings, CoM_rings_xyz, normal_vectors = get_ring_data(
            trj_in, ab_chains, ring_atoms)
        pipi_ring_pairs = get_pipi_interactions(
            trj_in, CG_rings, CoM_rings_xyz, normal_vectors, cutoff_ring,
            cutoff_angle_pipi)

        all_atom_indices, all_atom_serials, all_resSeq, all_resname,\
            all_chainIDs, all_chain_type, all_cdr = get_data_from_ring_ring(
                pdb_idcode, trj_in, pipi_ring_pairs, ab_chains, df_dataset)
    except Exception as e:
        all_atom_indices = []
        all_atom_serials = []
        all_resSeq = []
        all_resname = []
        all_chainIDs = []
        all_chain_type = []
        all_cdr = []
        logging.error(
            f"- {pdb_idcode} raised: {e.__class__}, saying: {e}, during PiPi "
            f"interactions calculation. Aborting.")
        raise e
    finally:
        # Collect
        df_PiPi_atom_indices = pd.concat([df_PiPi_atom_indices, pd.DataFrame({
            'idcode': pdb_idcode, 'atom_indices': [all_atom_indices]})])
        df_PiPi_atom_serials = pd.concat([df_PiPi_atom_serials, pd.DataFrame({
            'idcode': pdb_idcode, 'atom_serials': [all_atom_serials]})])
        df_PiPi_resSeq = pd.concat([df_PiPi_resSeq, pd.DataFrame({
            'idcode': pdb_idcode, 'resSeq': [all_resSeq]})])
        df_PiPi_resname = pd.concat([df_PiPi_resname, pd.DataFrame({
            'idcode': pdb_idcode, 'resname': [all_resname]})])
        df_PiPi_chain_ID = pd.concat([df_PiPi_chain_ID, pd.DataFrame({
            'idcode': pdb_idcode, 'chain_ID': [all_chainIDs]})])
        df_PiPi_chain_type = pd.concat([df_PiPi_chain_type, pd.DataFrame({
            'idcode': pdb_idcode, 'chain_type': [all_chain_type]})])
        df_PiPi_cdr = pd.concat([df_PiPi_cdr, pd.DataFrame({
            'idcode': pdb_idcode, 'CDR': [all_cdr]})])


### Pi-Ion

In [None]:
bad_pdbs = []
df_PiAnion_atom_indices = pd.DataFrame(
    columns=['idcode', 'atom_indices'])
df_PiAnion_atom_serials = pd.DataFrame(
    columns=['idcode', 'atom_serials'])
df_PiAnion_resSeq = pd.DataFrame(
    columns=['idcode', 'resSeq'])
df_PiAnion_chain_ID = pd.DataFrame(
    columns=['idcode', 'chain_ID'])
df_PiAnion_chain_type = pd.DataFrame(
    columns=['idcode', 'chain_type'])
df_PiAnion_cdr = pd.DataFrame(
    columns=['idcode', 'CDR'])
df_PiCation_atom_indices = pd.DataFrame(
    columns=['idcode', 'atom_indices'])
df_PiCation_atom_serials = pd.DataFrame(
    columns=['idcode', 'atom_serials'])
df_PiCation_resSeq = pd.DataFrame(
    columns=['idcode', 'resSeq'])
df_PiCation_chain_ID = pd.DataFrame(
    columns=['idcode', 'chain_ID'])
df_PiCation_chain_type = pd.DataFrame(
    columns=['idcode', 'chain_type'])
df_PiCation_cdr = pd.DataFrame(
    columns=['idcode', 'CDR'])

In [None]:
bad_pdbs = []
df_PiAnion_atom_indices = pd.DataFrame(
    columns=['idcode', 'atom_indices'])
df_PiAnion_atom_serials = pd.DataFrame(
    columns=['idcode', 'atom_serials'])
df_PiAnion_resSeq = pd.DataFrame(
    columns=['idcode', 'resSeq'])
df_PiAnion_resname = pd.DataFrame(
    columns=['idcode', 'resname'])
df_PiAnion_chain_ID = pd.DataFrame(
    columns=['idcode', 'chain_ID'])
df_PiAnion_chain_type = pd.DataFrame(
    columns=['idcode', 'chain_type'])
df_PiAnion_cdr = pd.DataFrame(
    columns=['idcode', 'CDR'])
df_PiCation_atom_indices = pd.DataFrame(
    columns=['idcode', 'atom_indices'])
df_PiCation_atom_serials = pd.DataFrame(
    columns=['idcode', 'atom_serials'])
df_PiCation_resSeq = pd.DataFrame(
    columns=['idcode', 'resSeq'])
df_PiCation_resname = pd.DataFrame(
    columns=['idcode', 'resname'])
df_PiCation_chain_ID = pd.DataFrame(
    columns=['idcode', 'chain_ID'])
df_PiCation_chain_type = pd.DataFrame(
    columns=['idcode', 'chain_type'])
df_PiCation_cdr = pd.DataFrame(
    columns=['idcode', 'CDR'])

check_pdb = '7cn2'
idx = pdb_list.index(check_pdb)
for pdb_idcode, pdb_file, is_cpx in zip([pdb_list[idx]], [file_pdb_list[idx]], [is_cpx_pdb[idx]]):
# for pdb_idcode, pdb_file, is_cpx in zip(pdb_list, file_pdb_list, is_cpx_pdb):
    print(f"{pdb_idcode}", flush=True)

    if lacks_epitope_atoms(df_dataset, pdb_idcode):
        print(f" ----- BAD: {pdb_idcode} ----- ")
        bad_pdbs.append(pdb_idcode)

        df_PiAnion_atom_indices = pd.concat(
            [df_PiAnion_atom_indices, pd.DataFrame(
                {'idcode': [pdb_idcode]})])
        df_PiAnion_atom_serials = pd.concat(
            [df_PiAnion_atom_serials, pd.DataFrame(
                {'idcode': [pdb_idcode]})])
        df_PiAnion_resSeq = pd.concat(
            [df_PiAnion_resSeq, pd.DataFrame({'idcode': [pdb_idcode]})])
        df_PiAnion_resname = pd.concat(
            [df_PiAnion_resname, pd.DataFrame({'idcode': [pdb_idcode]})])
        df_PiAnion_chain_ID = pd.concat(
            [df_PiAnion_chain_ID, pd.DataFrame({'idcode': [pdb_idcode]})])
        df_PiAnion_chain_type = pd.concat(
            [df_PiAnion_chain_type, pd.DataFrame({'idcode': [pdb_idcode]})])
        df_PiAnion_cdr = pd.concat(
            [df_PiAnion_cdr, pd.DataFrame({'idcode': [pdb_idcode]})])

        df_PiCation_atom_indices = pd.concat(
            [df_PiCation_atom_indices, pd.DataFrame(
                {'idcode': [pdb_idcode]})])
        df_PiCation_atom_serials = pd.concat(
            [df_PiCation_atom_serials, pd.DataFrame(
                {'idcode': [pdb_idcode]})])
        df_PiCation_resSeq = pd.concat(
            [df_PiCation_resSeq, pd.DataFrame({'idcode': [pdb_idcode]})])
        df_PiCation_resname = pd.concat(
            [df_PiCation_resname, pd.DataFrame({'idcode': [pdb_idcode]})])
        df_PiCation_chain_ID = pd.concat(
            [df_PiCation_chain_ID, pd.DataFrame({'idcode': [pdb_idcode]})])
        df_PiCation_chain_type = pd.concat(
            [df_PiCation_chain_type, pd.DataFrame({'idcode': [pdb_idcode]})])
        df_PiCation_cdr = pd.concat(
            [df_PiCation_cdr, pd.DataFrame({'idcode': [pdb_idcode]})])

        continue

    if is_cpx:
        pdb_filename = Path.joinpath(str_dir, pdb_idcode + '.pdb')
    else:
        pdb_filename = Path.joinpath(casa_dir, pdb_file)
    trj_in = md.load(pdb_filename)

    all_ab_chains = [
        fila.chainID for i,
        fila in df_dataset[df_dataset.idcode == pdb_idcode].iterrows()]
    ag_chains = [
        chain.chain_id for chain in trj_in.topology.chains
        if chain.chain_id not in all_ab_chains]
    ab_chains = [
        chain.chain_id for chain in trj_in.topology.chains
        if chain.chain_id not in ag_chains]

    #
    # Pi-ion interactions
    #
    try:
        CG_rings, CoM_rings_xyz, normal_vectors = get_ring_data(
            trj_in, ab_chains, ring_atoms_pi_ion)

        _, _, ids_ON_epitope_atoms, ids_ON_cdr_atoms, _, _ = get_ids_CON(
            trj_in.topology, df_dataset, pdb_idcode, ab_chains)

        ring_ab_anion_ag, ring_ab_cation_ag = get_ion_ring_interactions(
            trj_in, CG_rings["antibody"], ids_ON_epitope_atoms, CoM_rings_xyz["antibody"],
            normal_vectors["antibody"], trj_in.xyz[0], cutoff_ring, cutoff_angle_pion)

        ring_ag_anion_ab, ring_ag_cation_ab = get_ion_ring_interactions(
            trj_in, CG_rings["antigen"], ids_ON_cdr_atoms, CoM_rings_xyz["antigen"],
            normal_vectors["antigen"], trj_in.xyz[0], cutoff_ring, cutoff_angle_pion)

        all_atom_indices_ring_anion,\
            all_atom_serials_ring_anion,\
            all_resSeq_ring_anion,\
            all_resname_ring_anion,\
            all_chainIDs_ring_anion,\
            all_chain_type_ring_anion,\
            all_cdr_ring_anion = get_data_from_ring_ion(
                pdb_idcode, trj_in, ring_ab_anion_ag + ring_ag_anion_ab, ab_chains, df_dataset)

        all_atom_indices_ring_cation,\
            all_atom_serials_ring_cation,\
            all_resSeq_ring_cation,\
            all_resname_ring_cation,\
            all_chainIDs_ring_cation,\
            all_chain_type_ring_cation,\
            all_cdr_ring_cation = get_data_from_ring_ion(
                pdb_idcode, trj_in, ring_ab_cation_ag + ring_ag_cation_ab, ab_chains, df_dataset)

    except Exception as e:
        all_atom_indices_ring_anion = []
        all_atom_serials_ring_anion = []
        all_resSeq_ring_anion = []
        all_resname_ring_anion = []
        all_chainIDs_ring_anion = []
        all_chain_type_ring_anion = []
        all_cdr_ring_anion = []

        all_atom_indices_ring_cation = []
        all_atom_serials_ring_cation = []
        all_resSeq_ring_cation = []
        all_resname_ring_cation = []
        all_chainIDs_ring_cation = []
        all_chain_type_ring_cation = []
        all_cdr_ring_cation = []

        logging.warning(
            f"- {pdb_idcode} raised: {e.__class__}, saying: {e}, during Pi-ion "
            f"interactions calculation. Aborting.")
        raise e
    finally:
        # Collect
        df_PiAnion_atom_indices = pd.concat([df_PiAnion_atom_indices, pd.DataFrame({
            'idcode': pdb_idcode, 'atom_indices': [all_atom_indices_ring_anion]})])
        df_PiAnion_atom_serials = pd.concat([df_PiAnion_atom_serials, pd.DataFrame({
            'idcode': pdb_idcode, 'atom_serials': [all_atom_serials_ring_anion]})])
        df_PiAnion_resSeq = pd.concat([df_PiAnion_resSeq, pd.DataFrame({
            'idcode': pdb_idcode, 'resSeq': [all_resSeq_ring_anion]})])
        df_PiAnion_resname = pd.concat([df_PiAnion_resname, pd.DataFrame({
            'idcode': pdb_idcode, 'resname': [all_resname_ring_anion]})])
        df_PiAnion_chain_ID = pd.concat([df_PiAnion_chain_ID, pd.DataFrame({
            'idcode': pdb_idcode, 'chain_ID': [all_chainIDs_ring_anion]})])
        df_PiAnion_chain_type = pd.concat([df_PiAnion_chain_type, pd.DataFrame({
            'idcode': pdb_idcode, 'chain_type': [all_chain_type_ring_anion]})])
        df_PiAnion_cdr = pd.concat([df_PiAnion_cdr, pd.DataFrame({
            'idcode': pdb_idcode, 'CDR': [all_cdr_ring_anion]})])

        df_PiCation_atom_indices = pd.concat([df_PiCation_atom_indices, pd.DataFrame(
            {'idcode': pdb_idcode, 'atom_indices': [all_atom_indices_ring_cation]})])
        df_PiCation_atom_serials = pd.concat([df_PiCation_atom_serials, pd.DataFrame(
            {'idcode': pdb_idcode, 'atom_serials': [all_atom_serials_ring_cation]})])
        df_PiCation_resSeq = pd.concat([df_PiCation_resSeq, pd.DataFrame({
            'idcode': pdb_idcode, 'resSeq': [all_resSeq_ring_cation]})])
        df_PiCation_resname = pd.concat([df_PiCation_resname, pd.DataFrame({
            'idcode': pdb_idcode, 'resname': [all_resname_ring_cation]})])
        df_PiCation_chain_ID = pd.concat([df_PiCation_chain_ID, pd.DataFrame({
            'idcode': pdb_idcode, 'chain_ID': [all_chainIDs_ring_cation]})])
        df_PiCation_chain_type = pd.concat([df_PiCation_chain_type, pd.DataFrame(
            {'idcode': pdb_idcode, 'chain_type': [all_chain_type_ring_cation]})])
        df_PiCation_cdr = pd.concat([df_PiCation_cdr, pd.DataFrame({
            'idcode': pdb_idcode, 'CDR': [all_cdr_ring_cation]})])


In [None]:
type(df_PiCation_resname.resname[0][0].ring)

------

### hbond

In [494]:
import get_hbonds_90_39
importlib.reload(get_hbonds_90_39)
from get_hbonds_90_39 import *

hbond_dir = Path.joinpath(casa_dir, "hbonds")
with open(Path.joinpath(casa_dir, 'data', 'filenames.pkl'), 'rb') as file:
    filenames = pickle.load(file)
with open(Path.joinpath(casa_dir, 'data', 'chains.pkl'), 'rb') as file:
    chains = pickle.load(file)

pdb_list = list(filenames.keys())
df_dataset = get_df_dataset(casa_dir)

SabDab protein antigen:
1154 proteins out of 2017, 57.2%
All: 1154
No Hchain: 0
No Lchain: 0
Both chains: 1154
Buried surfaces of 2492 proteins
with both chains: 867


In [9]:
chains[pdb_idcode].antibody

('G', 'F')

In [495]:
hbonds_dict = {}

check_pdb = '4wfe'
idx = pdb_list.index(check_pdb)
for pdb_idcode in [pdb_list[idx]]:
    print(f"{pdb_idcode}", flush=True)
    pdb_filename = Path.joinpath(str_dir, pdb_idcode + '.pdb')
    trj_in = md.load(pdb_filename)
    ab_chains = chains[pdb_idcode].antibody
    ag_chains = chains[pdb_idcode].antigen

    hbond_dir = Path.joinpath(casa_dir, "hbonds")
    hbplus = Path.joinpath(casa_dir, 'hbonds', 'hbplus')

    process = subprocess.run([hbplus, pdb_filename, "-A", "0", "0", "0", "-d", "3.9"],
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE, cwd=hbond_dir)
    hb2_file = Path.joinpath(hbond_dir, pdb_filename.name[0:-3] + "hb2")

    hbonds_dict[pdb_idcode] = parse_hb2(
            pdb_idcode, hb2_file, df_dataset, trj_in.topology, ab_chains, ag_chains)

4wfe




In [499]:
source_location = Path().resolve()

In [500]:
source_location

PosixPath('/home/pbarletta/labo/22/AbAgInterface')

---

### shielding

In [502]:
import shielding
importlib.reload(shielding)
from shielding import *

with open(Path.joinpath(casa_dir, 'data', 'filenames.pkl'), 'rb') as file:
    filenames = pickle.load(file)
with open(Path.joinpath(casa_dir, 'data', 'chains.pkl'), 'rb') as file:
    chains = pickle.load(file)

pdb_list = list(filenames.keys())
df_dataset = get_df_dataset(casa_dir)

SabDab protein antigen:
1154 proteins out of 2017, 57.2%
All: 1154
No Hchain: 0
No Lchain: 0
Both chains: 1154
Buried surfaces of 2492 proteins
with both chains: 867


In [None]:
check_pdb = '5f96'
idx = pdb_list.index(check_pdb)
for pdb_idcode, pdb_file, is_cpx in zip([pdb_list[idx]], [file_pdb_list[idx]], [is_cpx_pdb[idx]]):
    print(f"{pdb_idcode}", flush=True)
    
    pdb_filename = Path.joinpath(casa_dir, pdb_file)
    trj_in = md.load(pdb_filename)

    all_ab_chains = [
        fila.chainID for i,
        fila in df_dataset[df_dataset.idcode == pdb_idcode].iterrows()]
    ag_chains = [
        chain.chain_id for chain in trj_in.topology.chains
        if chain.chain_id not in all_ab_chains]
    ab_chains = [
        chain.chain_id for chain in trj_in.topology.chains
        if chain.chain_id not in ag_chains]
        
    with open(Path.joinpath(casa_dir, 'data', 'hbonds_1_32.pkl'), 'rb') as file:
        hbonds_dict = pickle.load(file)

In [504]:
shielding_dict = {}
check_pdb = '5f96'
idx = pdb_list.index(check_pdb)
for pdb_idcode in [pdb_list[idx]]:
    # for pdb_idcode in pdb_list:
    print(f"{pdb_idcode}", flush=True)

    pdb_filename = Path.joinpath(str_dir, pdb_idcode + '.pdb')
    trj_in = md.load(pdb_filename)
    ab_chains = chains[pdb_idcode].antibody
    ag_chains = chains[pdb_idcode].antigen

    ids_C_epitope_atoms, ids_C_cdr_atoms, ids_ON_epitope_atoms, ids_ON_cdr_atoms,\
        is_complex_epitope, is_complex_cdr = get_ids_CON(
            trj_in.topology, df_dataset, pdb_idcode, ab_chains)

    if is_complex_epitope or is_complex_cdr:
        print(f"{pdb_idcode} has epitope atoms not found on the PDB.", flush=True)

    ids_ON_atoms = ids_ON_cdr_atoms + ids_ON_epitope_atoms

    C_ON_pairs = np.array(
        list(
            itertools.product(
                ids_C_cdr_atoms, ids_ON_atoms)))
    C_C_pairs = np.array(
        list(
            itertools.product(
                ids_C_cdr_atoms, ids_C_epitope_atoms)))

    C_ON_distancias = md.compute_distances(
        trj_in, C_ON_pairs).reshape(
        (len(ids_C_cdr_atoms),
            len(ids_ON_atoms)))

    C_C_distancias = md.compute_distances(
        trj_in, C_C_pairs).reshape(
        (len(ids_C_cdr_atoms),
            len(ids_C_epitope_atoms)))

    indices_close_C_C_distancias = np.where(C_C_distancias < cutoff)
    mask_close_C_ON_distancias = C_ON_distancias < cutoff
    shielding_pdb = {}

    for i, j in zip(*indices_close_C_C_distancias):
        C_cdr_id = ids_C_cdr_atoms[i]
        C_epi_id = ids_C_epitope_atoms[j]
        surrounding_ON_ids = [
            ids_ON_atoms[i]
            for i in np.where(mask_close_C_ON_distancias[i, :])[0]]

        shielded, ON_id = is_shielded(
            trj_in.xyz[0],
            C_cdr_id, C_epi_id, surrounding_ON_ids)

        if shielded:
            chainID = trj_in.topology.atom(ON_id).residue.chain.chain_id
            resSeq = trj_in.topology.atom(ON_id).residue.resSeq
            resname = trj_in.topology.atom(ON_id).residue.name
            chain_type, cdr = get_chain_info(
                df_dataset, pdb_idcode, ab_chains, chainID, resSeq)
            serial = trj_in.topology.atom(ON_id).serial
            element = trj_in.topology.atom(ON_id).element.symbol
            is_sidechain = trj_in.topology.atom(ON_id).is_sidechain

            # Compile all the info on this shielding polar atom.
            shielding_atom = ShieldingAtom(
                chainID=chainID, chain_type=chain_type, CDR=cdr,
                resSeq=resSeq, resname=resname, index=ON_id,
                serial=serial, element=element, is_sidechain=is_sidechain)

            shielding_pdb[ON_id] = shielding_atom

    shielding_dict[pdb_idcode] = shielding_pdb
    # Also add the total number of polar atoms on each molecule
    cnt_ON_cdr_SC, cnt_ON_cdr_BB, cnt_ON_epi_SC, cnt_ON_epi_BB = count_ONs(
        trj_in.topology, ids_ON_epitope_atoms, ids_ON_cdr_atoms)
    shielding_dict[pdb_idcode + "_cnt_ON_cdr_SC"] = cnt_ON_cdr_SC
    shielding_dict[pdb_idcode + "_cnt_ON_cdr_BB"] = cnt_ON_cdr_BB
    shielding_dict[pdb_idcode + "_cnt_ON_epi_SC"] = cnt_ON_epi_SC
    shielding_dict[pdb_idcode + "_cnt_ON_epi_BB"] = cnt_ON_epi_BB

5f96
5f96 has epitope atoms not found on the PDB.


-----

### Count_ONs

In [87]:
with open(Path.joinpath(casa_dir, 'data', 'hbonds_0_39.pkl'), 'rb') as file:
        hbonds = pickle.load(file)
interacting_ab_chains = {}

In [96]:
with open(Path.joinpath(casa_dir, 'data', 'filenames.pkl'), 'rb') as file:
        filenames = pickle.load(file)
with open(Path.joinpath(casa_dir, 'data', 'chains.pkl'), 'rb') as file:
    chains = pickle.load(file)
with open(Path.joinpath(casa_dir, 'data', 'hbonds_0_39.pkl'), 'rb') as file:
    hbonds = pickle.load(file)

pdb_list = list(filenames.keys())
df_dataset = get_df_dataset(casa_dir)

interacting_ab_chains = {}
for pdb_idcode in pdb_list:
    pdb_filename = Path(filenames[pdb_idcode])
    trj_in = md.load(Path.joinpath(exposed_dir, pdb_idcode, pdb_filename))
    ab_chains = chains[pdb_idcode].antibody
    ag_chains = chains[pdb_idcode].antigen

    serial_to_id = {}
    for atomo in trj_in.topology.atoms:
        serial_to_id[atomo.serial] = atomo.index

    ids_oxy_nitro = []
    for lista_hbond in hbonds[pdb_idcode].values():
        for hb in lista_hbond:
            ids_oxy_nitro.append(hb.donor.chainID)
            ids_oxy_nitro.append(hb.acceptor.chainID)

    interacting_ab_chains_pdb = set(Counter(ids_oxy_nitro).keys()) & set(ab_chains)

    interacting_ab_chains[pdb_idcode] = interacting_ab_chains_pdb

SabDab protein antigen:
1154 proteins out of 2017, 57.2%
All: 1154
No Hchain: 0
No Lchain: 0
Both chains: 1154
Buried surfaces of 2492 proteins
with both chains: 867


### DSSP

In [6]:
with open(Path.joinpath(casa_dir, 'data', 'filenames.pkl'), 'rb') as file:
    filenames = pickle.load(file)
with open(Path.joinpath(casa_dir, 'data', 'chains.pkl'), 'rb') as file:
    chains = pickle.load(file)
with open(Path.joinpath(casa_dir, 'data', 'interacting_chains.pkl'), 'rb') as file:
    interacting_chains = pickle.load(file)
with open(Path.joinpath(casa_dir, 'data', 'hbonds_0_39.pkl'), 'rb') as file:
    hbonds = pickle.load(file)

pdb_list = list(filenames.keys())
df_dataset = get_df_dataset(casa_dir)

SabDab protein antigen:
1154 proteins out of 2017, 57.2%
All: 1154
No Hchain: 0
No Lchain: 0
Both chains: 1154
Buried surfaces of 2492 proteins
with both chains: 867


In [11]:
SSE = {}
SSE_cnt = {}
check_pdb = '1afv'
idx = pdb_list.index(check_pdb)
for pdb_idcode in [pdb_list[idx]]:
# for pdb_idcode in pdb_list:
    print(f"{pdb_idcode}", flush=True)
    pdb_filename = Path(filenames[pdb_idcode])
    trj_in = md.load(Path.joinpath(exposed_dir, pdb_idcode, pdb_filename))
    ag_chain = chains[pdb_idcode].antigen
    # Get SSE of each residue
    dssp = md.compute_dssp(trj_in)[0]
    SSE_pdb = {}
    SSE_cnt_pdb = {'H': 0, 'E': 0, 'C': 0, 'NA': 0}
    for i, res in enumerate(trj_in.topology.residues):
        if res.chain.chain_id in ag_chain:
            SSE_pdb[res.resSeq] = dssp[i]
            SSE_cnt_pdb[dssp[i]] += 1
    SSE[pdb_idcode] = SSE_pdb
    SSE_cnt[pdb_idcode] = SSE_cnt_pdb

1afv


Counter({'ALA': 74,
         'TYR': 63,
         'THR': 107,
         'ASN': 80,
         'SER': 111,
         'PHE': 69,
         'ARG': 40,
         'GLY': 87,
         'VAL': 99,
         'PRO': 56,
         'ASP': 53,
         'LYS': 53,
         'LEU': 92,
         'HIS': 14,
         'GLN': 63,
         'TRP': 10,
         'GLU': 44,
         'ILE': 66,
         'CYS': 31,
         'MET': 15})

In [21]:
df_interface.query(f"idcode == '7mhy'")

Unnamed: 0,idcode,chainID,chain_type,ab_ag_interface_res,ag_ab_interface_res,chains,hchains,lchains,hcdr1,hcdr2,hcdr3,lcdr1,lcdr2,lcdr3,ab_res,ag_res,incomplete
0,7mhy,M,H,"[(M, 49, THR, CB, 1), (N, 91, SER, CB, 1), (N,...","[(A, 38, GLN, HG3, 1), (A, 278, GLU, CB, 1), (...","(9, 3, 3, 0, 5, 0, 2, 0, [THR, SER, TRP, ASN, ...",9.0,3.0,3.0,0.0,5.0,0.0,2.0,0.0,"[THR, SER, TRP, ASN, ASN, ASN, TRP, TRP, TRP, ...","[GLU, GLU, GLU, GLU, GLU, GLU, THR, THR, PHE, ...",False
