In [6]:
from scipy import sparse
import numpy as np
import random

In [7]:
atom_data = {'idx': [], 'atom_number': [], 'chain': [], 
             'position': [], 'type': [], 'bonds': [], 
             'lone_pairs': [], 'charge': [], 'connections': [],
             'mol_id': []}

In [8]:
#Assign all BGF atom data to arrays

with open('pnipaam.16x8.solv.spc.dreiding.qeq.bgf') as bgf:
    count = 0
    for line in bgf:
        dat = line.split()
        if dat[0] in ["HETATM", "ATOM"]:
            atom_data['idx'].append(int(dat[1]))
            atom_data['atom_number'].append(dat[2])
            atom_data['chain'].append(dat[3] + dat[4])
            atom_data['position'].append(np.array([float(dat[6]), float(dat[7]), float(dat[8])]))
            atom_data['type'].append(dat[9])
            atom_data['bonds'].append(int(dat[10]))
            atom_data['lone_pairs'].append(int(dat[11]))
            atom_data['charge'].append(float(dat[12]))
        
        if dat[0] == "CONECT":
            count += 1
            connects = []
            for i in dat[2:]:
                connects.append(int(i))   
            atom_data['connections'].append(connects)
    print(count)

5787


In [9]:
len(atom_data['idx'])

5787

In [10]:
atom_data['connections']

[[2],
 [1, 3, 4, 5],
 [2, 6, 7, 21],
 [2],
 [2],
 [3, 8, 9],
 [3],
 [6],
 [6, 10, 11],
 [9, 12, 16, 17],
 [9],
 [10, 13, 14, 15],
 [12],
 [12],
 [12],
 [10, 18, 19, 20],
 [10],
 [16],
 [16],
 [16],
 [3, 22, 23, 24],
 [21, 25, 26, 40],
 [21],
 [21],
 [22, 27, 28],
 [22],
 [25],
 [25, 29, 30],
 [28, 31, 35, 36],
 [28],
 [29, 32, 33, 34],
 [31],
 [31],
 [31],
 [29, 37, 38, 39],
 [29],
 [35],
 [35],
 [35],
 [22, 41, 42, 43],
 [40, 44, 45, 59],
 [40],
 [40],
 [41, 46, 47],
 [41],
 [44],
 [44, 48, 49],
 [47, 50, 54, 55],
 [47],
 [48, 51, 52, 53],
 [50],
 [50],
 [50],
 [48, 56, 57, 58],
 [48],
 [54],
 [54],
 [54],
 [41, 60, 61, 62],
 [59, 63, 64, 78],
 [59],
 [59],
 [60, 65, 66],
 [60],
 [63],
 [63, 67, 68],
 [66, 69, 73, 74],
 [66],
 [67, 70, 71, 72],
 [69],
 [69],
 [69],
 [67, 75, 76, 77],
 [67],
 [73],
 [73],
 [73],
 [60, 79, 80, 81],
 [78, 82, 83, 97],
 [78],
 [78],
 [79, 84, 85],
 [79],
 [82],
 [82, 86, 87],
 [85, 88, 92, 93],
 [85],
 [86, 89, 90, 91],
 [88],
 [88],
 [88],
 [86, 94, 95, 

In [11]:
n_atoms = len(atom_data['connections'])
connect_matrix = sparse.dok_matrix((n_atoms, n_atoms), dtype=np.int8)

In [12]:
for i in range(n_atoms):
#    print(atom_data['connections'][i])
    for j in atom_data['connections'][i]:
        connect_matrix[i, (j-1)] = 1

In [13]:
connect_matrix

<5787x5787 sparse matrix of type '<class 'numpy.int8'>'
	with 9332 stored elements in Dictionary Of Keys format>

In [14]:
n_components, component_list = sparse.csgraph.connected_components(connect_matrix)
atom_data['mol_id'] = component_list

In [15]:
atom_data['idx'][0]

1

In [16]:
for i in atom_data['connections'][1]:
    print(i, atom_data['atom_number'][i-1]) #correct for bgf 1-based indexing

1 H4
3 C2
4 H3
5 H5


Above connections indicate that C1 is the cross linking site.

Bis monomer is roughly 10 angstroms long, use this as initial search radius for avail neighbors
___________________________________________

#### process for finding crosslink site

~~binding sites for each molecule (list of indices)~~

random number b/w 0 and N (where N is index of last molecule)

random number b/w 0 and M (where M is index of last atom in molecule I)

search for available binding sites within distance on all other molecules

select one closest to length of crosslinker or randomly choose

In [29]:
bindsites = {}
for i in set(atom_data['mol_id']):
    bindsites[i] = []
    
for i, atom in enumerate(atom_data['atom_number']):
    if atom == 'C1':
        # use native biograf indexing for consistency?
        # no, too confusing. using 0-based indexing
        bindsites[atom_data['mol_id'][i]].append(i)
        # bind_site = (atom_data['mol_id'][i], atom, atom_data['position'][i], atom_data['idx'][i])
        # print(atom, atom_data['mol_id'][i], atom_data['position'][i], atom_data['idx'][i])

In [30]:
temp = {}
for i in bindsites:
    if len(bindsites[i]) > 0:
        temp[i] = bindsites[i]
bindsites = temp

In [51]:
# does an arbitrary atom have any valid sites with 10 angstroms?
for i, at in enumerate(atom_data['position']):
    test = np.sqrt(sum((at - atom_data['position'][1])**2))
    if (test <= 10) and (i > 306):
        print(i, test)

1083 9.4395162844078
1168 9.932435531902536
1170 8.681049287897173
1172 7.8366768357129555
1173 8.423631652393164
1174 7.191915583556302
1175 9.180617116082121
1176 7.039421186070627
1177 7.337102638269416
1178 6.090124056092782
1179 7.875205132725115
1180 7.402257436039358
1181 6.339136270628672
1182 8.336552338850874
1183 7.678293120179771
1184 6.572243678820803
1190 9.174687756588778
1194 9.499076308373358
2174 9.675971235498789
2176 9.245391179982596
2178 8.478320513792813
2179 9.605113783303143
2180 8.979412710110832
3022 9.427450407872744
3891 9.512486517435908
3893 8.68256166262008
4272 9.747808358995366
4273 9.75989486691839
4290 9.207752109798566
4291 9.960979758467534
4292 9.093130164354845
4314 7.549527346344274
4315 7.568280336166201
4316 8.04612197493923
4344 9.255284632041308
4345 8.971559598927044
4346 8.82616761167609
4386 8.450134762682781
4387 7.531090539795681
4388 8.860694956864274
4491 9.441414626108735
4492 8.883014015102079
4653 6.578811817912111
4654 5.650955730

In [64]:
# randomly select first bindpoint (molecule, then atom)
mol1 = random.randint(0, len(bindsites)-1)
site1_idx = random.randint(0, len(bindsites[mol1])-1)
site1 = bindsites[mol1][site1_idx]
print(mol1, site1)

# find all available sites within crosslink radius
available_sites = []
for mol in bindsites:
    if mol != mol1:
        for site in bindsites[mol]:
            distance = np.sqrt(sum((atom_data['position'][site] - atom_data['position'][site1])**2))
#            print(site, distance)
            if distance <= 12:
                print(site, distance, mol)

7 2390
459 11.161872164283192 1


In [48]:
len(bindsites)

8