# PDB File Parser for Vina

In [17]:
import pandas as pd
import numpy as np
import re

In [10]:
pdb_file = open('4ey4.pdb').readlines()

pdb = {'Record name': [],
       'Atom number': [],
       'Atom name': [],
       'AltLoc': [],
       'ResName': [],
       'ChainID': [],
       'ResSeq': [],
       'AChar': [],
       'X': [],
       'Y': [],
       'Z': [],
       'Occupancy': [],
       'Temperature factor': [],
       'Segment identifier': [],
       'Element symbol': [],
       'Charge': []
       }

for line in pdb_file:
    if re.match('^ATOM', line):
        pdb['Record name'].append(line[0:5])
        pdb['Atom number'].append(line[5:11])
        pdb['Atom name'].append(line[12:16])
        pdb['AltLoc'].append(line[16])
        pdb['ResName'].append(line[17:20])
        pdb['ChainID'].append(line[21])
        pdb['ResSeq'].append(line[22:26])
        pdb['AChar'].append(line[26])
        pdb['X'].append(line[30:38])
        pdb['Y'].append(line[38:46])
        pdb['Z'].append(line[46:54])
        pdb['Occupancy'].append(line[54:60])
        pdb['Temperature factor'].append(line[60:66])
        pdb['Segment identifier'].append(line[72:76])
        pdb['Element symbol'].append(line[76:78])
        pdb['Charge'].append(line[78:80])

pdb_df = pd.DataFrame(pdb,
                      columns=['Record name', 'Atom number', 'Atom name', 'AltLoc', 'ResName', 'ChainID', 'ResSeq', 'X', 'Y',
                               'Z', 'Occupancy', 'Temperature factor', 'Segment identifier', 'Element symbol', 'Charge']).set_index('Atom number')

pdb_df[['X', 'Y', 'Z', ]] = pdb_df[['X', 'Y', 'Z', ]].apply(pd.to_numeric)
pdb_df['ResSeq'] = pdb_df['ResSeq'].apply(pd.to_numeric)
pdb_df.head(5)

Unnamed: 0_level_0,Record name,Atom name,AltLoc,ResName,ChainID,ResSeq,X,Y,Z,Occupancy,Temperature factor,Segment identifier,Element symbol,Charge
Atom number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,ATOM,N,,GLU,A,4,2.162,-35.797,69.748,1.0,77.71,,N,
2,ATOM,CA,,GLU,A,4,2.144,-35.298,68.374,1.0,88.24,,C,
3,ATOM,C,,GLU,A,4,3.381,-35.792,67.611,1.0,87.84,,C,
4,ATOM,O,,GLU,A,4,4.51,-35.681,68.106,1.0,83.94,,O,
5,ATOM,CB,,GLU,A,4,2.072,-33.765,68.363,1.0,90.35,,C,


In [16]:
x_size = pdb_df.X.max() - pdb_df.X.min()
y_size = pdb_df.Y.max() - pdb_df.Y.min()
z_size = pdb_df.Z.max() - pdb_df.Z.min()

print('Globular Size\n',
      '--size_x', x_size, '--size_y', y_size, '--size_z', z_size)

print('Molecular Center\n',
      '--center_x', pdb_df.X.max() - x_size / 2,
      '--center_y', pdb_df.Y.max() - y_size / 2,
      '--center_z', pdb_df.Z.max() - z_size / 2)

print('Volume\n', np.around(x_size * y_size * z_size, 1))

Globular Size
 --size_x 58.227 --size_y 59.776 --size_z 71.957
Molecular Center
 --center_x -0.9965 --center_y -38.789 --center_z 33.7695
Volume
 250451.9


In [12]:
residue = 234

x_c = np.around(pdb_df[pdb_df.ResSeq == residue].X.median(), 3)
y_c = np.around(pdb_df[pdb_df.ResSeq == residue].Y.median(), 3)
z_c = np.around(pdb_df[pdb_df.ResSeq == residue].Z.median(), 3)

print('Active Site Center')
print('--center_x', x_c, '--center_y', y_c, '--center_z', z_c)

Active Site Center
--center_x -3.085 --center_y -37.897 --center_z 19.16


In [13]:
pdb_df[pdb_df.ResSeq == 234]

Unnamed: 0_level_0,Record name,Atom name,AltLoc,ResName,ChainID,ResSeq,X,Y,Z,Occupancy,Temperature factor,Segment identifier,Element symbol,Charge
Atom number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1783,ATOM,N,,GLY,A,234,-1.341,-37.87,19.5,1.0,42.69,,N,
1784,ATOM,CA,,GLY,A,234,-2.506,-38.72,19.258,1.0,50.34,,C,
1785,ATOM,C,,GLY,A,234,-3.663,-37.925,18.648,1.0,52.87,,C,
1786,ATOM,O,,GLY,A,234,-3.903,-36.788,19.061,1.0,57.25,,O,
