In [1]:
import os
import pandas
from biopandas.pdb import PandasPdb

In [2]:
project_root = os.path.dirname(os.getcwd())
data_root = os.path.join(project_root, "data")
data_root

'/home/khushnandan/WorkSpace/distance-between-aminoacids/data'

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
!ls {data_root}

3na0.pdb  3NA0_single.pdb


### function that takes the position of amino acids and calculates the minimum distance between them

In [5]:
import math
def minimum_distance(A,B):
    for col in ['x_coord', 'y_coord', 'z_coord']:
        A[col] = pandas.to_numeric(A[col])
        B[col] = pandas.to_numeric(B[col])
    dist = 100000000 # initializing with a large number
    for a in A.iterrows():
        for b in B.iterrows():
            dist_ = math.sqrt( ((a[1]['x_coord'] - b[1]['x_coord'])**2) + \
                                 ((a[1]['y_coord'] - b[1]['y_coord'])**2) + \
                                 ((a[1]['z_coord'] - b[1]['z_coord'])**2) )
            if dist_ < dist:
                dist = dist_
    return dist

### read the pdb data file using biopandas.pdb

In [6]:
ppdb = PandasPdb().read_pdb(os.path.join(data_root,'3NA0_single.pdb'))

In [7]:
ppdb.df['ATOM']

Unnamed: 0,record_name,atom_number,blank_1,atom_name,alt_loc,residue_name,blank_2,chain_id,residue_number,insertion,...,x_coord,y_coord,z_coord,occupancy,b_factor,blank_4,segment_id,element_symbol,charge,line_idx
0,ATOM,1,,N,,PRO,,A,45,,...,-68.771,-15.338,58.432,1.0,40.52,,,N,,703
1,ATOM,2,,CA,,PRO,,A,45,,...,-67.356,-15.057,58.788,1.0,40.21,,,C,,704
2,ATOM,3,,C,,PRO,,A,45,,...,-66.534,-16.349,58.969,1.0,39.93,,,C,,705
3,ATOM,4,,O,,PRO,,A,45,,...,-66.625,-16.981,60.022,1.0,39.92,,,O,,706
4,ATOM,5,,CB,,PRO,,A,45,,...,-67.461,-14.279,60.118,1.0,40.36,,,C,,707
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4064,ATOM,4066,,CB,,GLN,,C,93,,...,-48.582,20.181,60.000,1.0,86.71,,,C,,4768
4065,ATOM,4067,,CG,,GLN,,C,93,,...,-47.688,19.028,60.488,1.0,86.32,,,C,,4769
4066,ATOM,4068,,CD,,GLN,,C,93,,...,-46.348,19.483,61.052,1.0,85.81,,,C,,4770
4067,ATOM,4069,,OE1,,GLN,,C,93,,...,-45.728,20.419,60.546,1.0,85.81,,,O,,4771


In [8]:
ppdb.df['ATOM'].columns

Index(['record_name', 'atom_number', 'blank_1', 'atom_name', 'alt_loc',
       'residue_name', 'blank_2', 'chain_id', 'residue_number', 'insertion',
       'blank_3', 'x_coord', 'y_coord', 'z_coord', 'occupancy', 'b_factor',
       'blank_4', 'segment_id', 'element_symbol', 'charge', 'line_idx'],
      dtype='object')

In [9]:
df_groups = ppdb.df['ATOM'][ppdb.df['ATOM']['chain_id']=='A'].groupby('residue_number')
df_groups_ligand = ppdb.df['HETATM'].groupby('residue_number')
df_groups_chain = ppdb.df['ATOM'].groupby('chain_id')

In [10]:
ppdb.df['HETATM']

Unnamed: 0,record_name,atom_number,blank_1,atom_name,alt_loc,residue_name,blank_2,chain_id,residue_number,insertion,...,x_coord,y_coord,z_coord,occupancy,b_factor,blank_4,segment_id,element_symbol,charge,line_idx
0,HETATM,4072,,CHA,,HEM,,A,601,,...,-37.806,0.593,60.036,1.0,17.79,,,C,,4774
1,HETATM,4073,,CHB,,HEM,,A,601,,...,-37.487,0.851,55.153,1.0,18.00,,,C,,4775
2,HETATM,4074,,CHC,,HEM,,A,601,,...,-33.664,3.716,55.656,1.0,18.56,,,C,,4776
3,HETATM,4075,,CHD,,HEM,,A,601,,...,-33.505,2.880,60.443,1.0,18.92,,,C,,4777
4,HETATM,4076,,C1A,,HEM,,A,601,,...,-38.042,0.450,58.676,1.0,18.42,,,C,,4778
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72,HETATM,4144,,C27,,2DC,,A,602,,...,-29.407,-2.931,60.893,1.0,20.04,,,C,,4846
73,HETATM,4145,,FE1,,FES,,C,150,,...,-47.998,12.420,64.599,1.0,82.99,,,Fe,,4847
74,HETATM,4146,,FE2,,FES,,C,150,,...,-49.334,15.109,63.850,1.0,84.27,,,Fe,,4848
75,HETATM,4147,,S1,,FES,,C,150,,...,-48.569,13.390,62.707,1.0,83.72,,,S,,4849


In [11]:
df_groups.get_group(104) # access the residue number 104

Unnamed: 0,record_name,atom_number,blank_1,atom_name,alt_loc,residue_name,blank_2,chain_id,residue_number,insertion,...,x_coord,y_coord,z_coord,occupancy,b_factor,blank_4,segment_id,element_symbol,charge,line_idx
501,ATOM,502,,N,,PRO,,A,104,,...,-54.375,-3.394,48.829,1.0,26.17,,,N,,1204
502,ATOM,503,,CA,,PRO,,A,104,,...,-54.285,-2.18,49.645,1.0,26.76,,,C,,1205
503,ATOM,504,,C,,PRO,,A,104,,...,-55.512,-1.949,50.53,1.0,27.86,,,C,,1206
504,ATOM,505,,O,,PRO,,A,104,,...,-55.41,-1.255,51.549,1.0,28.36,,,O,,1207
505,ATOM,506,,CB,,PRO,,A,104,,...,-54.139,-1.067,48.606,1.0,26.37,,,C,,1208
506,ATOM,507,,CG,,PRO,,A,104,,...,-53.598,-1.739,47.403,1.0,25.76,,,C,,1209
507,ATOM,508,,CD,,PRO,,A,104,,...,-54.225,-3.094,47.393,1.0,25.56,,,C,,1210


In [12]:
# defining the list of amino acids by their positions, from which we want to compute the minimum distance 

muts_pos = [79,
104,
120,
138,
141,
151,
189,
215,
222,
269,
314,
330,
335,
353,
359,
360,
391,
395,
415,
423,
451,
460,
465]

### Dist from Hem molecule

In [13]:
B = df_groups_ligand.get_group(601)
print('AAcid', '\t', 'Distance')
for mut_pos in muts_pos:
    A = df_groups.get_group(mut_pos)
    print(mut_pos, '\t', '%.3f' % minimum_distance(A,B))


AAcid 	 Distance
79 	 17.143
104 	 14.165
120 	 2.666
138 	 7.001
141 	 5.995
151 	 2.667
189 	 29.796
215 	 11.353
222 	 15.797
269 	 16.333
314 	 13.287
330 	 3.292
335 	 6.742
353 	 20.185
359 	 21.665
360 	 20.775
391 	 3.535
395 	 4.947
415 	 9.280
423 	 13.298
451 	 11.074
460 	 2.729
465 	 5.257


### dist from 2DC molecule

In [14]:
B = df_groups_ligand.get_group(602)
print('AAcid', '\t', 'Distance')
for mut_pos in muts_pos:
    A = df_groups.get_group(mut_pos)
    print(mut_pos, '\t', '%.3f' % minimum_distance(A,B))

AAcid 	 Distance
79 	 12.994
104 	 17.526
120 	 3.919
138 	 7.964
141 	 7.250
151 	 9.564
189 	 34.139
215 	 14.039
222 	 20.002
269 	 9.945
314 	 13.150
330 	 3.969
335 	 13.315
353 	 28.023
359 	 28.967
360 	 27.085
391 	 3.275
395 	 3.261
415 	 9.010
423 	 13.886
451 	 14.828
460 	 7.894
465 	 8.733


### dist from Adrenodoxin molecule

In [15]:
B = df_groups_chain.get_group('C')
print('AAcid', '\t', 'Distance')
for mut_pos in muts_pos:
    A = df_groups.get_group(mut_pos)
    print(mut_pos, '\t', f"{minimum_distance(A,B):.3f}")

AAcid 	 Distance
79 	 20.476
104 	 7.746
120 	 10.484
138 	 15.897
141 	 16.506
151 	 4.841
189 	 36.183
215 	 18.844
222 	 22.948
269 	 28.571
314 	 19.431
330 	 16.494
335 	 14.877
353 	 15.906
359 	 10.418
360 	 13.213
391 	 13.967
395 	 13.246
415 	 15.248
423 	 13.362
451 	 4.519
460 	 5.255
465 	 3.396
