In [1]:
from scipy import sparse
import numpy as np

In [2]:
atom_data = {'idx': [], 'atom_number': [], 'chain': [], 
             'position': [], 'type': [], 'bonds': [], 
             'lone_pairs': [], 'charge': [], 'connections': [],
             'mol_id': []}

In [3]:
#Assign all BGF atom data to arrays

with open('pnipaam.16x8.solv.spc.dreiding.qeq.bgf') as bgf:
    count = 0
    for line in bgf:
        dat = line.split()
        if dat[0] in ["HETATM", "ATOM"]:
            atom_data['idx'].append(int(dat[1]))
            atom_data['atom_number'].append(dat[2])
            atom_data['chain'].append(dat[3] + dat[4])
            atom_data['position'].append((float(dat[6]), float(dat[7]), float(dat[8])))
            atom_data['type'].append(dat[9])
            atom_data['bonds'].append(int(dat[10]))
            atom_data['lone_pairs'].append(int(dat[11]))
            atom_data['charge'].append(float(dat[12]))
        
        if dat[0] == "CONECT":
            count += 1
            connects = []
            for i in dat[2:]:
                connects.append(int(i))   
            atom_data['connections'].append(connects)
    print(count)

5787


In [4]:
len(atom_data['idx'])

5787

In [5]:
atom_data['connections']

[[2],
 [1, 3, 4, 5],
 [2, 6, 7, 21],
 [2],
 [2],
 [3, 8, 9],
 [3],
 [6],
 [6, 10, 11],
 [9, 12, 16, 17],
 [9],
 [10, 13, 14, 15],
 [12],
 [12],
 [12],
 [10, 18, 19, 20],
 [10],
 [16],
 [16],
 [16],
 [3, 22, 23, 24],
 [21, 25, 26, 40],
 [21],
 [21],
 [22, 27, 28],
 [22],
 [25],
 [25, 29, 30],
 [28, 31, 35, 36],
 [28],
 [29, 32, 33, 34],
 [31],
 [31],
 [31],
 [29, 37, 38, 39],
 [29],
 [35],
 [35],
 [35],
 [22, 41, 42, 43],
 [40, 44, 45, 59],
 [40],
 [40],
 [41, 46, 47],
 [41],
 [44],
 [44, 48, 49],
 [47, 50, 54, 55],
 [47],
 [48, 51, 52, 53],
 [50],
 [50],
 [50],
 [48, 56, 57, 58],
 [48],
 [54],
 [54],
 [54],
 [41, 60, 61, 62],
 [59, 63, 64, 78],
 [59],
 [59],
 [60, 65, 66],
 [60],
 [63],
 [63, 67, 68],
 [66, 69, 73, 74],
 [66],
 [67, 70, 71, 72],
 [69],
 [69],
 [69],
 [67, 75, 76, 77],
 [67],
 [73],
 [73],
 [73],
 [60, 79, 80, 81],
 [78, 82, 83, 97],
 [78],
 [78],
 [79, 84, 85],
 [79],
 [82],
 [82, 86, 87],
 [85, 88, 92, 93],
 [85],
 [86, 89, 90, 91],
 [88],
 [88],
 [88],
 [86, 94, 95, 

In [6]:
n_atoms = len(atom_data['connections'])
connect_matrix = sparse.dok_matrix((n_atoms, n_atoms), dtype=np.int8)

In [7]:
for i in range(n_atoms):
#    print(atom_data['connections'][i])
    for j in atom_data['connections'][i]:
        connect_matrix[i, (j-1)] = 1

In [8]:
connect_matrix

<5787x5787 sparse matrix of type '<class 'numpy.int8'>'
	with 9332 stored elements in Dictionary Of Keys format>

In [9]:
n_components, component_list = sparse.csgraph.connected_components(connect_matrix)
atom_data['mol_id'] = component_list

In [11]:
for i, atom in enumerate(atom_data['atom_number']):
    if atom == 'C1':
        bind_site = (atom_data['mol_id'][i], atom, atom_data['position'][i], atom_data['idx'][i])
        print(atom, atom_data['mol_id'][i], atom_data['position'][i], atom_data['idx'][i])

C1 0 (34.99729, 16.73984, 16.78345) 2
C1 0 (34.22638, 19.13486, 17.02033) 21
C1 0 (35.93328, 20.7456, 18.1785) 40
C1 0 (38.27748, 21.74991, 18.29863) 59
C1 0 (39.23532, 20.92723, 15.93632) 78
C1 0 (41.14398, 19.27312, 15.4467) 97
C1 0 (40.09438, 17.77174, 13.66105) 116
C1 0 (39.17114, 15.21615, 13.76115) 135
C1 0 (38.72937, 12.9166, 12.29791) 154
C1 0 (40.89241, 12.0573, 13.56639) 173
C1 0 (41.44014, 9.57449, 14.03465) 192
C1 0 (43.22573, 8.35611, 15.46525) 211
C1 0 (44.80526, 9.47752, 17.21635) 230
C1 0 (43.90877, 7.39283, 18.65881) 249
C1 0 (45.40546, 6.5197, 20.58908) 268
C1 0 (46.02427, 4.06289, 20.98952) 287
C1 1 (11.77111, 30.82927, 1.47131) 308
C1 1 (11.76488, 31.4618, -0.97541) 327
C1 1 (9.45821, 32.26906, -1.91187) 346
C1 1 (7.44743, 33.83478, -1.75809) 365
C1 1 (8.38872, 35.59316, 0.02989) 384
C1 1 (7.57212, 36.06849, 2.4228) 403
C1 1 (9.76847, 36.08882, 3.73429) 422
C1 1 (10.85526, 34.30023, 5.47014) 441
C1 1 (12.53481, 33.9953, 7.64048) 460
C1 1 (10.26144, 33.72909, 8.97731

In [19]:
for i in atom_data['connections'][1]:
    print(i, atom_data['atom_number'][i])

1 C1
3 H3
4 H5
5 C6


In [21]:
atom_data['idx'][0]

1