# Find Epitope Residues on Nipah Virus G Glycoprotein

In [4]:
from pandaprot import PandaProt
from biopandas.pdb import PandasPdb

In [42]:
pdb_path = "../data/pdbs/2vsm_clean.pdb"

antigen_chain = 'A'  # Nipah Virus G Glycoprotein
binder_chains = ['B']  # Ephrin-B2 Receptor

## Amino Acid translation (3 to 1 letter)
aa_keys = {'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F',
           'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L',
           'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R',
           'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y'}

In [43]:
## Initialize PandaProt with PDB file
analyzer = PandaProt(pdb_path, chains=["A", "B"])

## Map all interactions
interactions = analyzer.map_interactions()

Successfully loaded structure from ../data/pdbs/2vsm_clean.pdb
Found 1268 interactions:
  - hydrogen_bonds: 58
  - ionic_interactions: 10
  - hydrophobic_interactions: 26
  - pi_stacking: 0
  - pi_cation: 2
  - salt_bridges: 10
  - cation_pi: 2
  - ch_pi: 24
  - disulfide_bridges: 0
  - sulfur_aromatic: 0
  - water_mediated: 0
  - metal_coordination: 0
  - halogen_bonds: 0
  - amide_aromatic: 0
  - van_der_waals: 1116
  - amide_amide: 20


In [44]:
## Find epitope residues (antigen residues to interact with antibody)
epitope_residues = []

for interaction_type, interactions_list in interactions.items():
    for interaction in interactions_list:
        ## Extract chain information from interaction
        chain1 = interaction.get('chain1', interaction.get('donor_chain', ''))
        chain2 = interaction.get('chain2', interaction.get('acceptor_chain', ''))
        
        ## Extract residue information
        res1 = interaction.get('residue1', interaction.get('donor_residue', ''))
        res2 = interaction.get('residue2', interaction.get('acceptor_residue', ''))
        
        ## Check if antigen (chain A) is interacting with binder (chain B)
        if chain1 == antigen_chain and chain2 in binder_chains:
            epitope_residues.append(int(res1.split(' ')[1]))
        elif chain2 == antigen_chain and chain1 in binder_chains:
            epitope_residues.append(int(res2.split(' ')[1]))

epitope_residues = list(set(epitope_residues))
epitope_residues.sort()

print(f"Epitope Residues on Chain {antigen_chain} interacting with Chain(s) {binder_chains}:")
print(epitope_residues)

Epitope Residues on Chain A interacting with Chain(s) ['B']:
[239, 240, 241, 242, 305, 388, 389, 401, 402, 458, 488, 489, 490, 491, 492, 501, 504, 505, 506, 507, 530, 531, 532, 533, 555, 557, 558, 559, 579, 580, 581, 583, 588]


In [45]:
pdb_df = PandasPdb().read_pdb(pdb_path)

atom_df = pdb_df.df['ATOM']
## Only keep rows for the specified chain
chain_df = atom_df[atom_df['chain_id'] == antigen_chain]
## Get unique residue numbers in order of appearance
residues = chain_df[['residue_number', 'residue_name']].drop_duplicates()

residues.head()

Unnamed: 0,residue_number,residue_name
0,188,ILE
8,189,CYS
14,190,LEU
22,191,GLN
31,192,LYS


In [46]:
## Loop through residues and print those in epitope_residues
annotated_sequence = []

for row, (resnum, resname) in enumerate(residues.values, 1):  # 1-based index
    if int(resnum) in epitope_residues:
        annotated_sequence.append(f"[{aa_keys[resname]}]")
    else:
        annotated_sequence.append(aa_keys[resname])

print("".join(annotated_sequence))

ICLQKTSNQILKPKLISYTLGQSGTCITDPLLAMDEGYFAYSHLERIG[S][C][S][R]GVSKQRIIGVGEVLDRGDEVPSLFMTNVWTPPNPNTVYHCSAVYNNEFYYVLCAVSTVGDPI[L]NSTYWSGSLMMTRLAVKPKSNGGGYNQHQLALRSIEKGRYDKVMPYGPSGIKQGDTLYFPAVGFLVRTEFKYNDSNCPITKC[Q][Y]SKPENCRLSMG[I][R]PNSHYILRSGLLKYNLSDGENPKVVFIEISDQRLSIGSPSKIYDSLGQPVFYQAS[F]SWDTMIKFGDVLTVNPLVVNWRNNTVISR[P][G][Q][S][Q]CPRFNTCP[E]IC[W][E][G][V]YNDAFLIDRINWISAGVFLDSN[Q][T][A][E]NPVFTVFKDNEILYRAQLASE[D]T[N][A][Q]KTITNCFLLKNKIWCISLV[E][I][Y]D[T]GDNV[I]RPKLFAVKIPEQCTH
