-
Notifications
You must be signed in to change notification settings - Fork 504
/
modeller.py
1591 lines (1378 loc) · 80.8 KB
/
modeller.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
modeller.py: Provides tools for editing molecular models
This is part of the OpenMM molecular simulation toolkit originating from
Simbios, the NIH National Center for Physics-Based Simulation of
Biological Structures at Stanford, funded under the NIH Roadmap for
Medical Research, grant U54 GM072970. See https://simtk.org.
Portions copyright (c) 2012-2021 Stanford University and the Authors.
Authors: Peter Eastman
Contributors:
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
from __future__ import division
from __future__ import absolute_import
__author__ = "Peter Eastman"
__version__ = "1.0"
from openmm.app import Topology, PDBFile, ForceField
from openmm.app.forcefield import AllBonds, CutoffNonPeriodic, CutoffPeriodic, DrudeGenerator, _getDataDirectories
from openmm.app.internal import compiled
from openmm.vec3 import Vec3
from openmm import System, Context, NonbondedForce, CustomNonbondedForce, HarmonicBondForce, HarmonicAngleForce, VerletIntegrator, LangevinIntegrator, LocalEnergyMinimizer
from openmm.unit import nanometer, molar, elementary_charge, degree, acos, is_quantity, dot, norm, kilojoules_per_mole
import openmm.unit as unit
from . import element as elem
import gc
import os
import random
import sys
import xml.etree.ElementTree as etree
from copy import deepcopy
from math import ceil, floor
from collections import defaultdict
class Modeller(object):
"""Modeller provides tools for editing molecular models, such as adding water or missing hydrogens.
To use it, create a Modeller object, specifying the initial Topology and atom positions. You can
then call various methods to change the model in different ways. Each time you do, a new Topology
and list of coordinates is created to represent the changed model. Finally, call getTopology()
and getPositions() to get the results.
"""
_residueHydrogens = {}
_hasLoadedStandardHydrogens = False
def __init__(self, topology, positions):
"""Create a new Modeller object
Parameters
----------
topology : Topology
the initial Topology of the model
positions : list
the initial atomic positions
"""
## The Topology describing the structure of the system
self.topology = topology
if not is_quantity(positions):
positions = positions*nanometer
## The list of atom positions
self.positions = positions
def getTopology(self):
"""Get the Topology of the model."""
return self.topology
def getPositions(self):
"""Get the atomic positions."""
return self.positions
def add(self, addTopology, addPositions):
"""Add chains, residues, atoms, and bonds to the model.
Specify what to add by providing a new Topology object and the
corresponding atomic positions. All chains, residues, atoms, and bonds
contained in the Topology are added to the model.
Parameters
----------
addTopology : Topology
a Topology whose contents should be added to the model
addPositions : list
the positions of the atoms to add
"""
# Copy over the existing model.
newTopology = Topology()
newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
newAtoms = {}
newPositions = []*nanometer
for chain in self.topology.chains():
newChain = newTopology.addChain(chain.id)
for residue in chain.residues():
newResidue = newTopology.addResidue(residue.name, newChain, residue.id, residue.insertionCode)
for atom in residue.atoms():
newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
newAtoms[atom] = newAtom
newPositions.append(deepcopy(self.positions[atom.index]))
for bond in self.topology.bonds():
newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
# Add the new model
newAtoms = {}
for chain in addTopology.chains():
newChain = newTopology.addChain(chain.id)
for residue in chain.residues():
newResidue = newTopology.addResidue(residue.name, newChain, residue.id, residue.insertionCode)
for atom in residue.atoms():
newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
newAtoms[atom] = newAtom
newPositions.append(deepcopy(addPositions[atom.index]))
for bond in addTopology.bonds():
newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
self.topology = newTopology
self.positions = newPositions
def delete(self, toDelete):
"""Delete chains, residues, atoms, and bonds from the model.
You can specify objects to delete at any granularity: atoms, residues, or chains. Passing
in an Atom object causes that Atom to be deleted. Passing in a Residue object causes that
Residue and all Atoms it contains to be deleted. Passing in a Chain object causes that
Chain and all Residues and Atoms it contains to be deleted.
In all cases, when an Atom is deleted, any bonds it participates in are also deleted.
You also can specify a bond (as a tuple of Atom objects) to delete just that bond without
deleting the Atoms it connects.
Parameters
----------
toDelete : list
a list of Atoms, Residues, Chains, and bonds (specified as tuples of
Atoms) to delete
"""
newTopology = Topology()
newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
newAtoms = {}
newPositions = []*nanometer
deleteSet = set(toDelete)
for chain in self.topology.chains():
if chain not in deleteSet:
needNewChain = True;
for residue in chain.residues():
if residue not in deleteSet:
needNewResidue = True
for atom in residue.atoms():
if atom not in deleteSet:
if needNewChain:
newChain = newTopology.addChain(chain.id)
needNewChain = False;
if needNewResidue:
newResidue = newTopology.addResidue(residue.name, newChain, residue.id, residue.insertionCode)
needNewResidue = False;
newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
newAtoms[atom] = newAtom
newPositions.append(deepcopy(self.positions[atom.index]))
for bond in self.topology.bonds():
if bond[0] in newAtoms and bond[1] in newAtoms:
if bond not in deleteSet and (bond[1], bond[0]) not in deleteSet:
newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
self.topology = newTopology
self.positions = newPositions
def deleteWater(self):
"""Delete all water molecules from the model."""
self.delete(res for res in self.topology.residues() if res.name == "HOH")
def convertWater(self, model='tip3p'):
"""Convert all water molecules to a different water model.
@deprecated Use addExtraParticles() instead. It performs the same function but in a more general way.
Parameters
----------
model : string='tip3p'
the water model to convert to. Supported values are 'tip3p',
'spce', 'tip4pew', and 'tip5p'.
"""
if model in ('tip3p', 'spce'):
sites = 3
elif model == 'tip4pew':
sites = 4
elif model == 'tip5p':
sites = 5
else:
raise ValueError('Unknown water model: %s' % model)
newTopology = Topology()
newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
newAtoms = {}
newPositions = []*nanometer
for chain in self.topology.chains():
newChain = newTopology.addChain(chain.id)
for residue in chain.residues():
newResidue = newTopology.addResidue(residue.name, newChain, residue.id, residue.insertionCode)
if residue.name == "HOH":
# Copy the oxygen and hydrogens
oatom = [atom for atom in residue.atoms() if atom.element == elem.oxygen]
hatoms = [atom for atom in residue.atoms() if atom.element == elem.hydrogen]
if len(oatom) != 1 or len(hatoms) != 2:
raise ValueError('Illegal water molecule (residue %d): contains %d oxygen(s) and %d hydrogen(s)' % (residue.index, len(oatom), len(hatoms)))
o = newTopology.addAtom(oatom[0].name, oatom[0].element, newResidue)
h1 = newTopology.addAtom(hatoms[0].name, hatoms[0].element, newResidue)
h2 = newTopology.addAtom(hatoms[1].name, hatoms[1].element, newResidue)
newAtoms[oatom[0]] = o
newAtoms[hatoms[0]] = h1
newAtoms[hatoms[1]] = h2
po = deepcopy(self.positions[oatom[0].index])
ph1 = deepcopy(self.positions[hatoms[0].index])
ph2 = deepcopy(self.positions[hatoms[1].index])
newPositions.append(po)
newPositions.append(ph1)
newPositions.append(ph2)
# Add virtual sites.
if sites == 4:
newTopology.addAtom('M', None, newResidue)
newPositions.append(0.786646558*po + 0.106676721*ph1 + 0.106676721*ph2)
elif sites == 5:
newTopology.addAtom('M1', None, newResidue)
newTopology.addAtom('M2', None, newResidue)
v1 = (ph1-po).value_in_unit(nanometer)
v2 = (ph2-po).value_in_unit(nanometer)
cross = Vec3(v1[1]*v2[2]-v1[2]*v2[1], v1[2]*v2[0]-v1[0]*v2[2], v1[0]*v2[1]-v1[1]*v2[0])
newPositions.append(po - (0.34490826*v1 - 0.34490826*v2 - 6.4437903*cross)*nanometer)
newPositions.append(po - (0.34490826*v1 - 0.34490826*v2 + 6.4437903*cross)*nanometer)
else:
# Just copy the residue over.
for atom in residue.atoms():
newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
newAtoms[atom] = newAtom
newPositions.append(deepcopy(self.positions[atom.index]))
for bond in self.topology.bonds():
if bond[0] in newAtoms and bond[1] in newAtoms:
newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
self.topology = newTopology
self.positions = newPositions
def _addIons(self, forcefield, numWaters, replaceableMols, ionCutoff=0.05*nanometer, positiveIon='Na+', negativeIon='Cl-', ionicStrength=0*molar, neutralize=True):
"""Adds ions to the system by replacing certain molecules.
Parameters
----------
forcefield : ForceField
the ForceField to use to determine the total charge of the system.
numWaters : int
the total number of water molecules in the simulation box, used to
calculate the number of ions / concentration to add.
replaceableMols : dict
the molecules to replace by ions, as a dictionary of residue:positions
ionCutoff: distance=0.5*nanometer
positiveIon : string='Na+'
the type of positive ion to add. Allowed values are 'Cs+', 'K+', 'Li+', 'Na+', and 'Rb+'
negativeIon : string='Cl-'
the type of negative ion to add. Allowed values are 'Cl-', 'Br-', 'F-', and 'I-'. Be aware
that not all force fields support all ion types.
ionicStrength : concentration=0*molar
the total concentration of ions (both positive and negative) to add. This
does not include ions that are added to neutralize the system.
Note that only monovalent ions are currently supported.
neutralize : bool=True
whether to add ions to neutralize the system
"""
posIonElements = {'Cs+': elem.cesium, 'K+': elem.potassium,
'Li+': elem.lithium, 'Na+': elem.sodium,
'Rb+': elem.rubidium}
negIonElements = {'Cl-': elem.chlorine, 'Br-': elem.bromine,
'F-': elem.fluorine, 'I-': elem.iodine}
ionPositions = []
numReplaceableMols = len(replaceableMols)
# Fetch ion elements from user input
if positiveIon not in posIonElements:
raise ValueError('Illegal value for positive ion: {}'.format(positiveIon))
if negativeIon not in negIonElements:
raise ValueError('Illegal value for negative ion: {}'.format(negativeIon))
positiveElement = posIonElements[positiveIon]
negativeElement = negIonElements[negativeIon]
# Determine the total charge of the system
system = forcefield.createSystem(self.topology)
for i in range(system.getNumForces()):
if isinstance(system.getForce(i), NonbondedForce):
nonbonded = system.getForce(i)
break
else:
raise ValueError('The ForceField does not specify a NonbondedForce')
totalCharge = 0.0
for i in range(nonbonded.getNumParticles()):
nb_i = nonbonded.getParticleParameters(i)
totalCharge += nb_i[0].value_in_unit(elementary_charge)
# Round to nearest integer
totalCharge = int(floor(0.5 + totalCharge))
# Figure out how many ions to add based on requested params/concentration
numPositive, numNegative = 0, 0
if neutralize:
if abs(totalCharge) > numReplaceableMols:
raise Exception('Cannot neutralize the system because the charge is greater than the number of available positions for ions')
if totalCharge > 0:
numNegative += totalCharge
else:
numPositive -= totalCharge
if ionicStrength > 0 * molar:
numIons = (numWaters - numPositive - numNegative) * ionicStrength / (55.4 * molar) # Pure water is about 55.4 molar (depending on temperature)
numPairs = int(floor(numIons + 0.5))
numPositive += numPairs
numNegative += numPairs
totalIons = numPositive + numNegative
if totalIons > 0:
# Randomly select a set of waters
# while ensuring ions are not placed too close to each other.
modeller = Modeller(self.topology, self.positions)
replaceableList = list(replaceableMols.keys())
numAddedIons = 0
numTrials = 10 # Attempts to add ions N times before quitting
toReplace = [] # list of molecules to be replaced
while numAddedIons < totalIons:
pickedMol = random.choice(replaceableList)
replaceableList.remove(pickedMol)
# Check distance to other ions
for pos in ionPositions:
distance = norm(pos - replaceableMols[pickedMol])
if distance <= ionCutoff:
numTrials -= 1
break
else:
toReplace.append(pickedMol)
ionPositions.append(replaceableMols[pickedMol])
numAddedIons += 1
n_trials = 10
if n_trials == 0:
raise ValueError('Could not add more than {} ions to the system'.format(numAddedIons))
# Replace waters/ions in the topology
modeller.delete(toReplace)
ionChain = modeller.topology.addChain()
for i, water in enumerate(toReplace):
element = (positiveElement if i < numPositive else negativeElement)
newResidue = modeller.topology.addResidue(element.symbol.upper(), ionChain)
modeller.topology.addAtom(element.symbol, element, newResidue)
modeller.positions.append(replaceableMols[water])
# Update topology/positions
self.topology = modeller.topology
self.positions = modeller.positions
def addSolvent(self, forcefield, model='tip3p', boxSize=None, boxVectors=None, padding=None, numAdded=None, positiveIon='Na+', negativeIon='Cl-', ionicStrength=0*molar, neutralize=True):
"""Add solvent (both water and ions) to the model to fill a rectangular box.
The algorithm works as follows:
1. Water molecules are added to fill the box.
2. Water molecules are removed if their distance to any solute atom is less than the sum of their van der Waals radii.
3. If the solute is charged and neutralize=True, enough positive or negative ions are added to neutralize it. Each ion is added by
randomly selecting a water molecule and replacing it with the ion.
4. Ion pairs are added to give the requested total ionic strength. Note that only monovalent ions are currently supported.
The box size can be specified in any of several ways:
1. You can explicitly give the vectors defining the periodic box to use.
2. Alternatively, for a rectangular box you can simply give the dimensions of the unit cell.
3. You can give a padding distance. The largest dimension of the solute (along the x, y, or z axis) is determined, and a cubic
box of size (largest dimension)+2*padding is used.
4. You can specify the total number of molecules (both waters and ions) to add. A cubic box is then created whose size is
just large enough hold the specified amount of solvent.
5. Finally, if none of the above options is specified, the existing Topology's box vectors are used.
Parameters
----------
forcefield : ForceField
the ForceField to use for determining van der Waals radii and atomic charges
model : str='tip3p'
the water model to use. Supported values are 'tip3p', 'spce', 'tip4pew', and 'tip5p'.
boxSize : Vec3=None
the size of the box to fill with water
boxVectors : tuple of Vec3=None
the vectors defining the periodic box to fill with water
padding : distance=None
the padding distance to use
numAdded : int=None
the total number of molecules (waters and ions) to add
positiveIon : string='Na+'
the type of positive ion to add. Allowed values are 'Cs+', 'K+', 'Li+', 'Na+', and 'Rb+'
negativeIon : string='Cl-'
the type of negative ion to add. Allowed values are 'Cl-', 'Br-', 'F-', and 'I-'. Be aware
that not all force fields support all ion types.
ionicStrength : concentration=0*molar
the total concentration of ions (both positive and negative) to add. This
does not include ions that are added to neutralize the system.
Note that only monovalent ions are currently supported.
neutralize : bool=True
whether to add ions to neutralize the system
"""
if len([x for x in (boxSize, boxVectors, padding, numAdded) if x is not None]) > 1:
raise ValueError('At most one of the following arguments may be specified: boxSize, boxVectors, padding, numAdded')
# Load the pre-equilibrated water box.
vdwRadiusPerSigma = 0.5612310241546864907
if model == 'tip3p':
waterRadius = 0.31507524065751241*vdwRadiusPerSigma
elif model == 'spce':
waterRadius = 0.31657195050398818*vdwRadiusPerSigma
elif model == 'tip4pew':
waterRadius = 0.315365*vdwRadiusPerSigma
elif model == 'tip5p':
waterRadius = 0.312*vdwRadiusPerSigma
else:
raise ValueError('Unknown water model: %s' % model)
pdb = PDBFile(os.path.join(os.path.dirname(__file__), 'data', model+'.pdb'))
pdbTopology = pdb.getTopology()
pdbPositions = pdb.getPositions().value_in_unit(nanometer)
pdbResidues = list(pdbTopology.residues())
pdbBoxSize = pdbTopology.getUnitCellDimensions().value_in_unit(nanometer)
# Pick a unit cell size.
if numAdded is not None:
# Select a padding distance which is guaranteed to give more than the specified number of molecules.
padding = 1.1*(numAdded/((len(pdbResidues)/pdbBoxSize[0]**3)*8))**(1.0/3.0)
if padding < 0.5:
padding = 0.5 # Ensure we have enough when adding very small numbers of molecules
if boxVectors is not None:
if is_quantity(boxVectors[0]):
boxVectors = (boxVectors[0].value_in_unit(nanometer), boxVectors[1].value_in_unit(nanometer), boxVectors[2].value_in_unit(nanometer))
box = Vec3(boxVectors[0][0], boxVectors[1][1], boxVectors[2][2])
vectors = boxVectors
elif boxSize is not None:
if is_quantity(boxSize):
boxSize = boxSize.value_in_unit(nanometer)
box = Vec3(boxSize[0], boxSize[1], boxSize[2])
vectors = (Vec3(boxSize[0], 0, 0), Vec3(0, boxSize[1], 0), Vec3(0, 0, boxSize[2]))
elif padding is not None:
if is_quantity(padding):
padding = padding.value_in_unit(nanometer)
if len(self.positions) == 0:
maxSize = 0
else:
maxSize = max(max((pos[i] for pos in self.positions))-min((pos[i] for pos in self.positions)) for i in range(3))
maxSize = maxSize.value_in_unit(nanometer)
box = (maxSize+2*padding)*Vec3(1, 1, 1)
vectors = (Vec3(maxSize+2*padding, 0, 0), Vec3(0, maxSize+2*padding, 0), Vec3(0, 0, maxSize+2*padding))
else:
box = self.topology.getUnitCellDimensions().value_in_unit(nanometer)
vectors = self.topology.getPeriodicBoxVectors().value_in_unit(nanometer)
if box is None:
raise ValueError('Neither the box size, box vectors, nor padding was specified, and the Topology does not define unit cell dimensions')
# Have the ForceField build a System for the solute from which we can determine van der Waals radii.
system = forcefield.createSystem(self.topology)
nonbonded = None
for i in range(system.getNumForces()):
if isinstance(system.getForce(i), NonbondedForce):
nonbonded = system.getForce(i)
if nonbonded is None:
raise ValueError('The ForceField does not specify a NonbondedForce')
cutoff = [waterRadius]*system.getNumParticles()
for i in range(system.getNumParticles()):
params = nonbonded.getParticleParameters(i)
if params[2] != 0*kilojoules_per_mole:
cutoff[i] += params[1].value_in_unit(nanometer)*vdwRadiusPerSigma
waterCutoff = waterRadius
if len(cutoff) == 0:
maxCutoff = waterCutoff
else:
maxCutoff = max(waterCutoff, max(cutoff))
# Copy the solute over.
newTopology = Topology()
newTopology.setPeriodicBoxVectors(vectors*nanometer)
newAtoms = {}
newPositions = []*nanometer
for chain in self.topology.chains():
newChain = newTopology.addChain(chain.id)
for residue in chain.residues():
newResidue = newTopology.addResidue(residue.name, newChain, residue.id, residue.insertionCode)
for atom in residue.atoms():
newAtom = newTopology.addAtom(atom.name, atom.element, newResidue, atom.id)
newAtoms[atom] = newAtom
newPositions.append(deepcopy(self.positions[atom.index]))
for bond in self.topology.bonds():
newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
# Sort the solute atoms into cells for fast lookup.
if len(self.positions) == 0:
positions = []
else:
positions = deepcopy(self.positions.value_in_unit(nanometer))
cells = _CellList(positions, maxCutoff, vectors, True)
# Create a function to compute the distance between two points, taking periodic boundary conditions into account.
periodicDistance = compiled.periodicDistance(vectors)
# Find the list of water molecules to add.
newChain = newTopology.addChain()
if len(positions) == 0:
center = Vec3(0, 0, 0)
else:
center = [(max((pos[i] for pos in positions))+min((pos[i] for pos in positions)))/2 for i in range(3)]
center = Vec3(center[0], center[1], center[2])
numBoxes = [int(ceil(box[i]/pdbBoxSize[i])) for i in range(3)]
addedWaters = []
for boxx in range(numBoxes[0]):
for boxy in range(numBoxes[1]):
for boxz in range(numBoxes[2]):
offset = Vec3(boxx*pdbBoxSize[0], boxy*pdbBoxSize[1], boxz*pdbBoxSize[2])
for residue in pdbResidues:
oxygen = [atom for atom in residue.atoms() if atom.element == elem.oxygen][0]
atomPos = pdbPositions[oxygen.index]+offset
if not any((atomPos[i] > box[i] for i in range(3))):
# This molecule is inside the box, so see how close to it is to the solute.
atomPos += center-box/2
for i in cells.neighbors(atomPos):
if periodicDistance(atomPos, positions[i]) < cutoff[i]:
break
else:
# Record this water molecule as one to add.
addedWaters.append((residue.index, atomPos))
if numAdded is not None:
# We added many more waters than we actually want. Sort them based on distance to the nearest box edge and
# only keep the ones in the middle.
lowerBound = center-box/2
upperBound = center+box/2
distToEdge = (min(min(pos-lowerBound), min(upperBound-pos)) for index, pos in addedWaters)
sortedIndex = [i[0] for i in sorted(enumerate(distToEdge), key=lambda x: -x[1])]
addedWaters = [addedWaters[i] for i in sortedIndex[:numAdded]]
# Compute a new periodic box size.
maxSize = max(max((pos[i] for index, pos in addedWaters))-min((pos[i] for index, pos in addedWaters)) for i in range(3))
maxSize += 0.1 # Add padding to reduce clashes at the edge.
newTopology.setUnitCellDimensions(Vec3(maxSize, maxSize, maxSize))
else:
# There could be clashes between water molecules at the box edges. Find ones to remove.
upperCutoff = center+box/2-Vec3(waterCutoff, waterCutoff, waterCutoff)
lowerCutoff = center-box/2+Vec3(waterCutoff, waterCutoff, waterCutoff)
lowerSkinPositions = [pos for index, pos in addedWaters if pos[0] < lowerCutoff[0] or pos[1] < lowerCutoff[1] or pos[2] < lowerCutoff[2]]
filteredWaters = []
cells.cells = {}
for i in range(len(lowerSkinPositions)):
cell = tuple((int(floor(lowerSkinPositions[i][j]/cells.cellSize[j]))%cells.numCells[j] for j in range(3)))
if cell in cells.cells:
cells.cells[cell].append(i)
else:
cells.cells[cell] = [i]
for entry in addedWaters:
pos = entry[1]
if pos[0] < upperCutoff[0] and pos[1] < upperCutoff[1] and pos[2] < upperCutoff[2]:
filteredWaters.append(entry)
else:
if not any((periodicDistance(lowerSkinPositions[i], pos) < waterCutoff and norm(lowerSkinPositions[i]-pos) > waterCutoff for i in cells.neighbors(pos))):
filteredWaters.append(entry)
addedWaters = filteredWaters
# Add the water molecules.
for index, pos in addedWaters:
newResidue = newTopology.addResidue(residue.name, newChain)
residue = pdbResidues[index]
oxygen = [atom for atom in residue.atoms() if atom.element == elem.oxygen][0]
oPos = pdbPositions[oxygen.index]
molAtoms = []
for atom in residue.atoms():
molAtoms.append(newTopology.addAtom(atom.name, atom.element, newResidue))
newPositions.append((pos+pdbPositions[atom.index]-oPos)*nanometer)
for atom1 in molAtoms:
if atom1.element == elem.oxygen:
for atom2 in molAtoms:
if atom2.element == elem.hydrogen:
newTopology.addBond(atom1, atom2)
self.topology = newTopology
self.positions = newPositions
# Convert water list to dictionary (residue:position)
waterPos = {}
_oxygen = elem.oxygen
for chain in newTopology.chains():
for residue in chain.residues():
if residue.name == 'HOH':
for atom in residue.atoms():
if atom.element == _oxygen:
waterPos[residue] = newPositions[atom.index]
# Total number of waters in the box
numTotalWaters = len(waterPos)
# Add ions to neutralize the system.
self._addIons(forcefield, numTotalWaters, waterPos, positiveIon=positiveIon, negativeIon=negativeIon, ionicStrength=ionicStrength, neutralize=neutralize)
class _ResidueData:
"""Inner class used to encapsulate data about the hydrogens for a residue."""
def __init__(self, name):
self.name = name
self.variants = []
self.hydrogens = []
class _Hydrogen:
"""Inner class used to encapsulate data about a hydrogen atom."""
def __init__(self, name, parent, maxph, variants, terminal):
self.name = name
self.parent = parent
self.maxph = maxph
self.variants = variants
self.terminal = terminal
@staticmethod
def loadHydrogenDefinitions(file):
"""Load an XML file containing definitions of hydrogens that should be added by addHydrogens().
The built in hydrogens.xml file containing definitions for standard amino acids and nucleotides is loaded automatically.
This method can be used to load additional definitions for other residue types. They will then be used in subsequent
calls to addHydrogens().
Parameters
----------
file : string or file
An XML file containing hydrogen definitions. It may be either an
absolute file path, a path relative to the current working
directory, a path relative to this module's data subdirectory (for
built in sets of definitions), or an open file-like object with a read()
method from which the data can be loaded.
"""
tree = None
try:
# this handles either filenames or open file-like objects
tree = etree.parse(file)
except IOError:
for dataDir in _getDataDirectories():
f = os.path.join(dataDir, file)
if os.path.isfile(f):
tree = etree.parse(f)
break
if tree is None:
raise ValueError('Could not locate file')
infinity = float('Inf')
for residue in tree.getroot().findall('Residue'):
resName = residue.attrib['name']
data = Modeller._ResidueData(resName)
Modeller._residueHydrogens[resName] = data
for variant in residue.findall('Variant'):
data.variants.append(variant.attrib['name'])
for hydrogen in residue.findall('H'):
maxph = infinity
if 'maxph' in hydrogen.attrib:
maxph = float(hydrogen.attrib['maxph'])
atomVariants = None
if 'variant' in hydrogen.attrib:
atomVariants = hydrogen.attrib['variant'].split(',')
terminal = None
if 'terminal' in hydrogen.attrib:
terminal = hydrogen.attrib['terminal']
data.hydrogens.append(Modeller._Hydrogen(hydrogen.attrib['name'], hydrogen.attrib['parent'], maxph, atomVariants, terminal))
def addHydrogens(self, forcefield=None, pH=7.0, variants=None, platform=None):
"""Add missing hydrogens to the model.
Some residues can exist in multiple forms depending on the pH and properties of the local environment. These
variants differ in the presence or absence of particular hydrogens. In particular, the following variants
are supported:
Aspartic acid:
ASH: Neutral form with a hydrogen on one of the delta oxygens
ASP: Negatively charged form without a hydrogen on either delta oxygen
Cysteine:
CYS: Neutral form with a hydrogen on the sulfur
CYX: No hydrogen on the sulfur (either negatively charged, or part of a disulfide bond)
Glutamic acid:
GLH: Neutral form with a hydrogen on one of the epsilon oxygens
GLU: Negatively charged form without a hydrogen on either epsilon oxygen
Histidine:
HID: Neutral form with a hydrogen on the ND1 atom
HIE: Neutral form with a hydrogen on the NE2 atom
HIP: Positively charged form with hydrogens on both ND1 and NE2
HIN: Negatively charged form without a hydrogen on either ND1 or NE2
Lysine:
LYN: Neutral form with two hydrogens on the zeta nitrogen
LYS: Positively charged form with three hydrogens on the zeta nitrogen
The variant to use for each residue is determined by the following rules:
1. The most common variant at the specified pH is selected.
2. Any Cysteine that participates in a disulfide bond uses the CYX variant regardless of pH.
3. For a neutral Histidine residue, the HID or HIE variant is selected based on which one forms a better hydrogen bond.
You can override these rules by explicitly specifying a variant for any residue. To do that, provide a list for the
'variants' parameter, and set the corresponding element to the name of the variant to use.
A special case is when the model already contains a hydrogen that should not be present in the desired variant.
If you explicitly specify a variant using the 'variants' parameter, the residue will be modified to match the
desired variant, removing hydrogens if necessary. On the other hand, for residues whose variant is selected
automatically, this function will only add hydrogens. It will never remove ones that are already present in the
model, regardless of the specified pH.
In all cases, the positions of existing atoms (including existing hydrogens) are not modified.
Definitions for standard amino acids and nucleotides are built in. You can call loadHydrogenDefinitions() to load
additional definitions for other residue types.
Parameters
----------
forcefield : ForceField=None
the ForceField to use for determining the positions of hydrogens.
If this is None, positions will be picked which are generally
reasonable but not optimized for any particular ForceField.
pH : float=7.0
the pH based on which to select variants
variants : list=None
an optional list of variants to use. If this is specified, its
length must equal the number of residues in the model. variants[i]
is the name of the variant to use for residue i (indexed starting at
0). If an element is None, the standard rules will be followed to
select a variant for that residue.
platform : Platform=None
the Platform to use when computing the hydrogen atom positions. If
this is None, the default Platform will be used.
Returns
-------
list
a list of what variant was actually selected for each residue,
in the same format as the variants parameter
"""
# Check the list of variants.
residues = list(self.topology.residues())
if variants is not None:
if len(variants) != len(residues):
raise ValueError("The length of the variants list must equal the number of residues")
else:
variants = [None]*len(residues)
actualVariants = [None]*len(residues)
# Load the residue specifications.
if not Modeller._hasLoadedStandardHydrogens:
Modeller.loadHydrogenDefinitions(os.path.join(os.path.dirname(__file__), 'data', 'hydrogens.xml'))
# Make a list of atoms bonded to each atom.
bonded = {}
for atom in self.topology.atoms():
bonded[atom] = []
for atom1, atom2 in self.topology.bonds():
bonded[atom1].append(atom2)
bonded[atom2].append(atom1)
# Define a function that decides whether a set of atoms form a hydrogen bond, using fairly tolerant criteria.
def isHbond(d, h, a):
if norm(d-a) > 0.35*nanometer:
return False
deltaDH = h-d
deltaHA = a-h
deltaDH /= norm(deltaDH)
deltaHA /= norm(deltaHA)
return acos(dot(deltaDH, deltaHA)) < 50*degree
# Loop over residues.
newTopology = Topology()
newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors())
newAtoms = {}
newPositions = []*nanometer
newIndices = []
acceptors = [atom for atom in self.topology.atoms() if atom.element in (elem.oxygen, elem.nitrogen)]
for chain in self.topology.chains():
newChain = newTopology.addChain(chain.id)
for residue in chain.residues():
newResidue = newTopology.addResidue(residue.name, newChain, residue.id, residue.insertionCode)
isNTerminal = (residue == chain._residues[0])
isCTerminal = (residue == chain._residues[-1])
if residue.name in Modeller._residueHydrogens:
# Add hydrogens. First select which variant to use.
spec = Modeller._residueHydrogens[residue.name]
variant = variants[residue.index]
if variant is None:
if residue.name == 'CYS':
# If this is part of a disulfide, use CYX.
sulfur = [atom for atom in residue.atoms() if atom.element == elem.sulfur]
if len(sulfur) == 1 and any((atom.residue != residue for atom in bonded[sulfur[0]])):
variant = 'CYX'
if residue.name == 'HIS' and pH > 6.5:
# See if either nitrogen already has a hydrogen attached.
nd1 = [atom for atom in residue.atoms() if atom.name == 'ND1']
ne2 = [atom for atom in residue.atoms() if atom.name == 'NE2']
if len(nd1) != 1 or len(ne2) != 1:
raise ValueError('HIS residue (%d) has the wrong set of atoms' % residue.index)
nd1 = nd1[0]
ne2 = ne2[0]
nd1HasHydrogen = any((atom.element == elem.hydrogen for atom in bonded[nd1]))
ne2HasHydrogen = any((atom.element == elem.hydrogen for atom in bonded[ne2]))
if nd1HasHydrogen and ne2HasHydrogen:
variant = 'HIP'
elif nd1HasHydrogen:
variant = 'HID'
elif ne2HasHydrogen:
variant = 'HIE'
else:
# Estimate the hydrogen positions.
nd1Pos = self.positions[nd1.index]
ne2Pos = self.positions[ne2.index]
hd1Delta = Vec3(0, 0, 0)*nanometer
for other in bonded[nd1]:
hd1Delta += nd1Pos-self.positions[other.index]
hd1Delta *= 0.1*nanometer/norm(hd1Delta)
hd1Pos = nd1Pos+hd1Delta
he2Delta = Vec3(0, 0, 0)*nanometer
for other in bonded[ne2]:
he2Delta += ne2Pos-self.positions[other.index]
he2Delta *= 0.1*nanometer/norm(he2Delta)
he2Pos = ne2Pos+he2Delta
# See whether either hydrogen would form a hydrogen bond.
nd1IsBonded = False
ne2IsBonded = False
for acceptor in acceptors:
if acceptor.residue != residue:
acceptorPos = self.positions[acceptor.index]
if isHbond(nd1Pos, hd1Pos, acceptorPos):
nd1IsBonded = True
break
if isHbond(ne2Pos, he2Pos, acceptorPos):
ne2IsBonded = True
if ne2IsBonded and not nd1IsBonded:
variant = 'HIE'
else:
variant = 'HID'
elif residue.name == 'HIS':
variant = 'HIP'
if variant is not None and variant not in spec.variants:
raise ValueError('Illegal variant for %s residue: %s' % (residue.name, variant))
actualVariants[residue.index] = variant
removeExtraHydrogens = (variants[residue.index] is not None)
# Make a list of hydrogens that should be present in the residue.
parents = [atom for atom in residue.atoms() if atom.element != elem.hydrogen]
parentNames = [atom.name for atom in parents]
hydrogens = [h for h in spec.hydrogens if (variant is None and pH <= h.maxph) or (h.variants is None and pH <= h.maxph) or (h.variants is not None and variant in h.variants)]
hydrogens = [h for h in hydrogens if h.terminal is None or (isNTerminal and 'N' in h.terminal) or (isCTerminal and 'C' in h.terminal) or
((not isNTerminal) and (not isCTerminal) and '-' in h.terminal)]
hydrogens = [h for h in hydrogens if h.parent in parentNames]
# Loop over atoms in the residue, adding them to the new topology along with required hydrogens.
for parent in residue.atoms():
# Check whether this is a hydrogen that should be removed.
if removeExtraHydrogens and parent.element == elem.hydrogen and not any(parent.name == h.name for h in hydrogens):
continue
# Add the atom.
newAtom = newTopology.addAtom(parent.name, parent.element, newResidue)
newAtoms[parent] = newAtom
newPositions.append(deepcopy(self.positions[parent.index]))
if parent in parents:
# Match expected hydrogens with existing ones and find which ones need to be added.
existing = [atom for atom in bonded[parent] if atom.element == elem.hydrogen]
expected = [h for h in hydrogens if h.parent == parent.name]
if len(existing) < len(expected):
# Try to match up existing hydrogens to expected ones.
matches = []
for e in existing:
match = [h for h in expected if h.name == e.name]
if len(match) > 0:
matches.append(match[0])
expected.remove(match[0])
else:
matches.append(None)
# If any hydrogens couldn't be matched by name, just match them arbitrarily.
for i in range(len(matches)):
if matches[i] is None:
matches[i] = expected[-1]
expected.remove(expected[-1])
# Add the missing hydrogens.
for h in expected:
newH = newTopology.addAtom(h.name, elem.hydrogen, newResidue)
newIndices.append(newH.index)
delta = Vec3(0, 0, 0)*nanometer
if len(bonded[parent]) > 0:
for other in bonded[parent]:
delta += self.positions[parent.index]-self.positions[other.index]
else:
delta = Vec3(random.random(), random.random(), random.random())*nanometer
delta *= 0.1*nanometer/norm(delta)
delta += 0.05*Vec3(random.random(), random.random(), random.random())*nanometer
delta *= 0.1*nanometer/norm(delta)
newPositions.append(self.positions[parent.index]+delta)
newTopology.addBond(newAtom, newH)
else:
# Just copy over the residue.
for atom in residue.atoms():
newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
newAtoms[atom] = newAtom
newPositions.append(deepcopy(self.positions[atom.index]))
for bond in self.topology.bonds():
if bond[0] in newAtoms and bond[1] in newAtoms:
newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
# The hydrogens were added at random positions. Now perform an energy minimization to fix them up.
addedH = set(newIndices) # keep track of Hs added
if forcefield is not None:
# Use the ForceField the user specified.
system = forcefield.createSystem(newTopology, rigidWater=False, nonbondedMethod=CutoffNonPeriodic)
atoms = list(newTopology.atoms())
for i in range(system.getNumParticles()):
if i not in addedH:
# Existing atom, make it immobile.
system.setParticleMass(i, 0)
else:
# Create a System that restrains the distance of each hydrogen from its parent atom
# and causes hydrogens to spread out evenly.
system = System()
nonbonded = CustomNonbondedForce('100/(r/0.1)^4')
nonbonded.setNonbondedMethod(CustomNonbondedForce.CutoffNonPeriodic);
nonbonded.setCutoffDistance(1*nanometer)
bonds = HarmonicBondForce()
angles = HarmonicAngleForce()
system.addForce(nonbonded)
system.addForce(bonds)
system.addForce(angles)
bondedTo = []
for atom in newTopology.atoms():
nonbonded.addParticle([])
if atom.index not in addedH: # make immobile
system.addParticle(0.0)
else:
system.addParticle(1.0)
bondedTo.append([])
for atom1, atom2 in newTopology.bonds():
if atom1.element == elem.hydrogen or atom2.element == elem.hydrogen:
bonds.addBond(atom1.index, atom2.index, 0.1, 100000.0)
bondedTo[atom1.index].append(atom2)
bondedTo[atom2.index].append(atom1)
for residue in newTopology.residues():
if residue.name == 'HOH':