/
trajectory.py
1314 lines (1095 loc) · 49.4 KB
/
trajectory.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
##############################################################################
# MDTraj: A Python Library for Loading, Saving, and Manipulating
# Molecular Dynamics Trajectories.
# Copyright 2012-2014 Stanford University and the Authors
#
# Authors: Robert McGibbon
# Contributors: Kyle A. Beauchamp, TJ Lane, Joshua Adelman, Lee-Ping Wang
#
# MDTraj is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 2.1
# of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with MDTraj. If not, see <http://www.gnu.org/licenses/>.
##############################################################################
##############################################################################
# Imports
##############################################################################
from __future__ import print_function, division
import os
import warnings
import functools
from copy import deepcopy
import numpy as np
from mdtraj.formats import DCDTrajectoryFile
from mdtraj.formats import BINPOSTrajectoryFile
from mdtraj.formats import XTCTrajectoryFile
from mdtraj.formats import TRRTrajectoryFile
from mdtraj.formats import HDF5TrajectoryFile
from mdtraj.formats import NetCDFTrajectoryFile
from mdtraj.formats import LH5TrajectoryFile
from mdtraj.formats import PDBTrajectoryFile
from mdtraj.formats import MDCRDTrajectoryFile
from mdtraj.formats import ArcTrajectoryFile
from mdtraj.core.topology import Topology
from mdtraj.utils import unitcell, ensure_type, convert, cast_indices
from mdtraj.utils.six.moves import xrange
from mdtraj.utils.six import PY3
from mdtraj import _rmsd
from mdtraj import _FormatRegistry
##############################################################################
# Globals
##############################################################################
__all__ = ['open', 'load', 'iterload', 'load_frame', 'Trajectory']
##############################################################################
# Utilities
##############################################################################
def _assert_files_exist(filenames):
"""Throw an IO error if files don't exist
Parameters
----------
filenames : {str, [str]}
String or list of strings to check
"""
if isinstance(filenames, str):
filenames = [filenames]
for fn in filenames:
if not (os.path.exists(fn) and os.path.isfile(fn)):
raise IOError('No such file: %s' % fn)
def _parse_topology(top):
"""Get the topology from a argument of indeterminate type
If top is a string, we try loading a pdb, if its a trajectory
we extract its topology.
"""
if isinstance(top, str) and (os.path.splitext(top)[1] in ['.pdb', '.h5','.lh5']):
topology = load_frame(top, 0).topology
elif isinstance(top, Trajectory):
topology = top.topology
elif isinstance(top, Topology):
topology = top
else:
raise TypeError('A topology is required. You supplied top=%s' % top)
return topology
##############################################################################
# Utilities
##############################################################################
def open(filename, mode='r', force_overwrite=True, **kwargs):
"""Open a trajectory file-like object
This factor function returns an instance of an open file-like
object capable of reading/writing the trajectory (depending on
'mode'). It does not actually load the trajectory from disk or
write anything.
Parameters
----------
filename : str
Path to the trajectory file on disk
mode : {'r', 'w'}
The mode in which to open the file, either 'r' for read or 'w' for
write.
force_overwrite : bool
If opened in write mode, and a file by the name of `filename` already
exists on disk, should we overwrite it?
Other Parameters
----------------
kwargs : dict
Other keyword parameters are passed directly to the file object
Returns
-------
fileobject : object
Open trajectory file, whose type is determined by the filename
extension
See Also
--------
load, ArcTrajectoryFile, BINPOSTrajectoryFile, DCDTrajectoryFile,
HDF5TrajectoryFile, LH5TrajectoryFile, MDCRDTrajectoryFile,
NetCDFTrajectoryFile, PDBTrajectoryFile, TRRTrajectoryFile,
XTCTrajectoryFile
"""
extension = os.path.splitext(filename)[1]
try:
loader = _FormatRegistry.fileobjects[extension]
except KeyError:
raise IOError('Sorry, no loader for filename=%s (extension=%s) '
'was found. I can only load files with extensions in %s'
% (filename, extension, _FormatRegistry.fileobjects.keys()))
return loader(filename, mode=mode, force_overwrite=force_overwrite, **kwargs)
def load_frame(filename, index, top=None, atom_indices=None):
"""Load a single frame from a trajectory file
Parameters
----------
filename : str
Path to the trajectory file on disk
index : int
Load the `index`-th frame from the specified file
top : {str, Trajectory, Topology}
Most trajectory formats do not contain topology information. Pass in
either the path to a RCSB PDB file, a trajectory, or a topology to
supply this information.
atom_indices : array_like, optional
If not none, then read only a subset of the atoms coordinates from the
file. These indices are zero-based (not 1 based, as used by the PDB
format).
Examples
--------
>>> import mdtraj as md
>>> first_frame = md.load_frame('traj.h5', 0)
>>> print first_frame
<mdtraj.Trajectory with 1 frames, 22 atoms>
See Also
--------
load, load_frame
Returns
-------
trajectory : md.Trajectory
The resulting conformation, as an md.Trajectory object containing
a single frame.
"""
_assert_files_exist(filename)
extension = os.path.splitext(filename)[1]
try:
loader = _FormatRegistry.loaders[extension]
except KeyError:
raise IOError('Sorry, no loader for filename=%s (extension=%s) '
'was found. I can only load files with extensions in %s'
% (filename, extension, _FormatRegistry.loaders.keys()))
kwargs = {'top': top, 'atom_indices': atom_indices}
if loader.__name__ in ['load_hdf5', 'load_pdb']:
kwargs.pop('top', None)
return loader(filename, frame=index, **kwargs)
def load(filename_or_filenames, discard_overlapping_frames=False, **kwargs):
"""Load a trajectory from one or more files on disk.
This function dispatches to one of the specialized trajectory loaders based
on the extension on the filename. Because different trajectory formats save
different information on disk, the specific keyword argument options supported
depend on the specific loaded.
Parameters
----------
filename_or_filenames : {str, list of strings}
Filename or list of filenames containing trajectory files of a single format.
discard_overlapping_frames : bool, default=False
Look for overlapping frames between the last frame of one filename and
the first frame of a subsequent filename and discard them
Other Parameters
----------------
top : {str, Trajectory, Topology}
Most trajectory formats do not contain topology information. Pass in
either the path to a RCSB PDB file, a trajectory, or a topology to
supply this information. This option is not required for the .h5, .lh5,
and .pdb formats, which already contain topology information.
stride : int, default=None
Only read every stride-th frame
atom_indices : array_like, optional
If not none, then read only a subset of the atoms coordinates from the
file. This may be slightly slower than the standard read because it
requires an extra copy, but will save memory.
See Also
--------
load_frame, iterload
Examples
--------
>>> import mdtraj as md
>>> traj = md.load('output.xtc', top='topology.pdb')
>>> print traj
<mdtraj.Trajectory with 500 frames, 423 atoms at 0x110740a90>
>>> traj2 = md.load('output.xtc', stride=2, top='topology.pdb')
>>> print traj2
<mdtraj.Trajectory with 250 frames, 423 atoms at 0x11136e410>
>>> traj3 = md.load_hdf5('output.xtc', atom_indices=[0,1] top='topology.pdb')
>>> print traj3
<mdtraj.Trajectory with 500 frames, 2 atoms at 0x18236e4a0>
Returns
-------
trajectory : md.Trajectory
The resulting trajectory, as an md.Trajectory object.
"""
_assert_files_exist(filename_or_filenames)
if "top" in kwargs: # If applicable, pre-loads the topology from PDB for major performance boost.
kwargs["top"] = _parse_topology(kwargs["top"])
# grab the extension of the filename
if isinstance(filename_or_filenames, str): # If a single filename
extension = os.path.splitext(filename_or_filenames)[1]
filename = filename_or_filenames
else: # If multiple filenames, take the first one.
extensions = [os.path.splitext(filename_i)[1] for filename_i in filename_or_filenames]
if len(set(extensions)) != 1:
raise(TypeError("All filenames must have same extension!"))
else:
t = [load(f, **kwargs) for f in filename_or_filenames]
# we know the topology is equal because we sent the same topology kwarg
# in, so there's no reason to spend extra time checking
return t[0].join(t[1:], discard_overlapping_frames=discard_overlapping_frames,
check_topology=False)
try:
#loader = _LoaderRegistry[extension][0]
loader = _FormatRegistry.loaders[extension]
except KeyError:
raise IOError('Sorry, no loader for filename=%s (extension=%s) '
'was found. I can only load files '
'with extensions in %s' % (filename, extension, _FormatRegistry.loaders.keys()))
if loader.__name__ in ['load_hdf5', 'load_pdb', 'load_lh5']:
if 'top' in kwargs:
warnings.warn('top= kwarg ignored since file contains topology information')
# this is a little hack that makes calling load() more predicable. since
# most of the loaders take a kwargs "top" except for load_hdf5, (since
# it saves the topology inside the file), we often end up calling
# load_hdf5 via this function with the top kwarg specified. but then
# there would be a signature binding error. it's easier just to ignore
# it.
kwargs.pop('top', None)
return loader(filename, **kwargs)
def iterload(filename, chunk=100, **kwargs):
"""An iterator over a trajectory from one or more files on disk, in fragments
This may be more memory efficient than loading an entire trajectory at
once
Parameters
----------
filename : str
Path to the trajectory file on disk
chunk : int
Number of frames to load at once from disk per iteration.
Other Parameters
----------------
top : {str, Trajectory, Topology}
Most trajectory formats do not contain topology information. Pass in
either the path to a RCSB PDB file, a trajectory, or a topology to
supply this information. This option is not required for the .h5, .lh5,
and .pdb formats, which already contain topology information.
stride : int, default=None
Only read every stride-th frame.
atom_indices : array_like, optional
If not none, then read only a subset of the atoms coordinates from the
file. This may be slightly slower than the standard read because it
requires an extra copy, but will save memory.
See Also
--------
load, load_frame
Examples
--------
>>> import mdtraj as md
>>> for chunk in md.iterload('output.xtc', top='topology.pdb')
... print chunk
<mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
<mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
<mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
<mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
<mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
"""
stride = kwargs.get('stride', 1)
atom_indices = cast_indices(kwargs.get('atom_indices', None))
if chunk % stride != 0:
raise ValueError('Stride must be a divisor of chunk. stride=%d does not go '
'evenly into chunk=%d' % (stride, chunk))
if filename.endswith('.h5'):
if 'top' in kwargs:
warnings.warn('top= kwarg ignored since file contains topology information')
with HDF5TrajectoryFile(filename) as f:
if atom_indices is None:
topology = f.topology
else:
topology = f.topology.subset(atom_indices)
while True:
data = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
if data == []:
raise StopIteration()
convert(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True)
convert(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)
yield Trajectory(xyz=data.coordinates, topology=topology,
time=data.time, unitcell_lengths=data.cell_lengths,
unitcell_angles=data.cell_angles)
if filename.endswith('.lh5'):
if 'top' in kwargs:
warnings.warn('top= kwarg ignored since file contains topology information')
with LH5TrajectoryFile(filename) as f:
if atom_indices is None:
topology = f.topology
else:
topology = f.topology.subset(atom_indices)
ptr = 0
while True:
xyz = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
if len(xyz) == 0:
raise StopIteration()
convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
time = np.arange(ptr, ptr+len(xyz)*stride, stride)
ptr += len(xyz)*stride
yield Trajectory(xyz=xyz, topology=topology, time=time)
elif filename.endswith('.xtc'):
topology = _parse_topology(kwargs.get('top', None))
with XTCTrajectoryFile(filename) as f:
while True:
xyz, time, step, box = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
if len(xyz) == 0:
raise StopIteration()
convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
convert(box, f.distance_unit, Trajectory._distance_unit, inplace=True)
trajectory = Trajectory(xyz=xyz, topology=topology, time=time)
trajectory.unitcell_vectors = box
yield trajectory
elif filename.endswith('.dcd'):
topology = _parse_topology(kwargs.get('top', None))
with DCDTrajectoryFile(filename) as f:
ptr = 0
while True:
# for reasons that I have not investigated, dcdtrajectory file chunk and stride
# together work like this method, but HDF5/XTC do not.
xyz, box_length, box_angle = f.read(chunk, stride=stride, atom_indices=atom_indices)
if len(xyz) == 0:
raise StopIteration()
convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
convert(box_length, f.distance_unit, Trajectory._distance_unit, inplace=True)
time = np.arange(ptr, ptr+len(xyz)*stride, stride)
ptr += len(xyz)*stride
yield Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=box_length,
unitcell_angles=box_angle)
else:
t = load(filename, **kwargs)
for i in range(0, len(t), chunk):
yield t[i:i+chunk]
class Trajectory(object):
"""Container object for a molecular dynamics trajectory
A Trajectory represents a collection of one or more molecular structures,
generally (but not necessarily) from a molecular dynamics trajectory. The
Trajectory stores a number of fields describing the system through time,
including the cartesian coordinates of each atoms (``xyz``), the topology
of the molecular system (``topology``), and information about the
unitcell if appropriate (``unitcell_vectors``, ``unitcell_length``,
``unitcell_angles``).
A Trajectory should generally be constructed by loading a file from disk.
Trajectories can be loaded from (and saved to) the PDB, XTC, TRR, DCD,
binpos, NetCDF or MDTraj HDF5 formats.
Trajectory supports fancy indexing, so you can extract one or more frames
from a Trajectory as a separate trajectory. For example, to form a
trajectory with every other frame, you can slice with ``traj[::2]``.
Trajectory uses the nanometer, degree & picosecond unit system.
Examples
--------
>>> # loading a trajectory
>>> import mdtraj as md
>>> md.load('trajectory.xtc', top='native.pdb')
<mdtraj.Trajectory with 1000 frames, 22 atoms at 0x1058a73d0>
>>> # slicing a trajectory
>>> t = md.load('trajectory.h5')
>>> print(t)
<mdtraj.Trajectory with 100 frames, 22 atoms>
>>> print(t[::2])
<mdtraj.Trajectory with 50 frames, 22 atoms>
>>> # calculating the average distance between two atoms
>>> import mdtraj as md
>>> import numpy as np
>>> t = md.load('trajectory.h5')
>>> np.mean(np.sqrt(np.sum((t.xyz[:, 0, :] - t.xyz[:, 21, :])**2, axis=1)))
See Also
--------
mdtraj.load : High-level function that loads files and returns an ``md.Trajectory``
Attributes
----------
n_frames : int
n_atoms : int
n_residues : int
time : np.ndarray, shape=(n_frames,)
timestep : float
topology : md.Topology
top : md.Topology
xyz : np.ndarray, shape=(n_frames, n_atoms, 3)
unitcell_vectors : {np.ndarray, shape=(n_frames, 3, 3), None}
unitcell_lengths : {np.ndarray, shape=(n_frames, 3), None}
unitcell_angles : {np.ndarray, shape=(n_frames, 3), None}
"""
# this is NOT configurable. if it's set to something else, things will break
# (thus why I make it private)
_distance_unit = 'nanometers'
@property
def topology(self):
"""Topology of the system, describing the organization of atoms into residues, bonds, etc
Returns
-------
topology : md.Topology
The topology object, describing the organization of atoms into
residues, bonds, etc
"""
return self._topology
@topology.setter
def topology(self, value):
"Set the topology of the system, describing the organization of atoms into residues, bonds, etc"
# todo: more typechecking
self._topology = value
@property
def n_frames(self):
"""Number of frames in the trajectory
Returns
-------
n_frames : int
The number of frames in the trajectory
"""
return self._xyz.shape[0]
@property
def n_atoms(self):
"""Number of atoms in the trajectory
Returns
-------
n_atoms : int
The number of atoms in the trajectory
"""
return self._xyz.shape[1]
@property
def n_residues(self):
"""Number of residues (amino acids) in the trajectory
Returns
-------
n_residues : int
The number of residues in the trajectory's topology
"""
if self.top is None:
return 0
return sum([1 for r in self.top.residues])
@property
def top(self):
"""Alias for self.topology, describing the organization of atoms into residues, bonds, etc
Returns
-------
topology : md.Topology
The topology object, describing the organization of atoms into
residues, bonds, etc
"""
return self._topology
@property
def timestep(self):
"""Timestep between frames, in picoseconds
Returns
-------
timestep : float
The timestep between frames, in picoseconds.
"""
if self.n_frames <= 1:
raise(ValueError("Cannot calculate timestep if trajectory has one frame."))
return self._time[1] - self._time[0]
@property
def time(self):
"""The simulation time corresponding to each frame, in picoseconds
Returns
-------
time : np.ndarray, shape=(n_frames,)
The simulation time corresponding to each frame, in picoseconds
"""
return self._time
@time.setter
def time(self, value):
"Set the simulation time corresponding to each frame, in picoseconds"
if isinstance(value, list):
value = np.array(value)
if np.isscalar(value) and self.n_frames == 1:
value = np.array([value])
elif not value.shape == (self.n_frames,):
raise ValueError('Wrong shape. Got %s, should be %s' % (value.shape,
(self.n_frames)))
self._time = value
@property
def unitcell_vectors(self):
"""The vectors that define the shape of the unit cell in each frame
Returns
-------
vectors : np.ndarray, shape(n_frames, 3, 3)
Vectors definiing the shape of the unit cell in each frame.
The semantics of this array are that the shape of the unit cell
in frame ``i`` are given by the three vectors, ``value[i, 0, :]``,
``value[i, 1, :]``, and ``value[i, 2, :]``.
"""
if self._unitcell_lengths is None or self._unitcell_angles is None:
return None
v1, v2, v3 = unitcell.lengths_and_angles_to_box_vectors(
self._unitcell_lengths[:, 0], # a
self._unitcell_lengths[:, 1], # b
self._unitcell_lengths[:, 2], # c
self._unitcell_angles[:, 0], # alpha
self._unitcell_angles[:, 1], # beta
self._unitcell_angles[:, 2], # gamma
)
return np.swapaxes(np.dstack((v1, v2, v3)), 1, 2)
@unitcell_vectors.setter
def unitcell_vectors(self, vectors):
"""Set the three vectors that define the shape of the unit cell
Parameters
----------
vectors : tuple of three arrays, each of shape=(n_frames, 3)
The semantics of this array are that the shape of the unit cell
in frame ``i`` are given by the three vectors, ``value[i, 0, :]``,
``value[i, 1, :]``, and ``value[i, 2, :]``.
"""
if vectors is None:
self._unitcell_lengths = None
self._unitcell_angles = None
return
if not len(vectors) == len(self):
raise TypeError('unitcell_vectors must be the same length as '
'the trajectory. you provided %s' % vectors)
v1 = vectors[:, 0, :]
v2 = vectors[:, 1, :]
v3 = vectors[:, 2, :]
a, b, c, alpha, beta, gamma = unitcell.box_vectors_to_lengths_and_angles(v1, v2, v3)
self._unitcell_lengths = np.vstack((a, b, c)).T
self._unitcell_angles = np.vstack((alpha, beta, gamma)).T
@property
def unitcell_lengths(self):
"""Lengths that define the shape of the unit cell in each frame.
Returns
-------
lengths : {np.ndarray, shape=(n_frames, 3), None}
Lengths of the unit cell in each frame, in nanometers, or None
if the Trajectory contains no unitcell information.
"""
return self._unitcell_lengths
@property
def unitcell_angles(self):
"""Angles that define the shape of the unit cell in each frame.
Returns
-------
lengths : np.ndarray, shape=(n_frames, 3)
The angles between the three unitcell vectors in each frame,
``alpha``, ``beta``, and ``gamma``. ``alpha' gives the angle
between vectors ``b`` and ``c``, ``beta`` gives the angle between
vectors ``c`` and ``a``, and ``gamma`` gives the angle between
vectors ``a`` and ``b``. The angles are in degrees.
"""
return self._unitcell_angles
@unitcell_lengths.setter
def unitcell_lengths(self, value):
"""Set the lengths that define the shape of the unit cell in each frame
Parameters
----------
value : np.ndarray, shape=(n_frames, 3)
The distances ``a``, ``b``, and ``c`` that define the shape of the
unit cell in each frame, or None
"""
self._unitcell_lengths = ensure_type(value, np.float32, 2,
'unitcell_lengths', can_be_none=True, shape=(len(self), 3),
warn_on_cast=False, add_newaxis_on_deficient_ndim=True)
@unitcell_angles.setter
def unitcell_angles(self, value):
"""Set the lengths that define the shape of the unit cell in each frame
Parameters
----------
value : np.ndarray, shape=(n_frames, 3)
The angles ``alpha``, ``beta`` and ``gamma`` that define the
shape of the unit cell in each frame. The angles should be in
degrees.
"""
self._unitcell_angles = ensure_type(value, np.float32, 2,
'unitcell_angles', can_be_none=True, shape=(len(self), 3),
warn_on_cast=False, add_newaxis_on_deficient_ndim=True)
@property
def xyz(self):
"""Cartesian coordinates of each atom in each simulation frame
Returns
-------
xyz : np.ndarray, shape=(n_frames, n_atoms, 3)
A three dimensional numpy array, with the cartesian coordinates
of each atoms in each frame.
"""
return self._xyz
@xyz.setter
def xyz(self, value):
"Set the cartesian coordinates of each atom in each simulation frame"
if self.top is not None:
# if we have a topology and its not None
shape = (None, self.topology._numAtoms, 3)
else:
shape = (None, None, 3)
value = ensure_type(value, np.float32, 3, 'xyz', shape=shape,
warn_on_cast=False, add_newaxis_on_deficient_ndim=True)
self._xyz = value
self._rmsd_traces = None
def _string_summary_basic(self):
"""Basic summary of traj in string form."""
return "mdtraj.Trajectory with %d frames, %d atoms, %d residues" % (self.n_frames, self.n_atoms, self.n_residues)
def _string_summary_unitcell(self):
"""unitcell summary of traj in string form."""
if self.unitcell_vectors is None:
return "%s\nContains no unitcell information.\n\n" % ("*" * 60)
else:
return "%s\nContains box vectors:\n%s" % ("*" * 60, str(self.unitcell_vectors))
def __len__(self):
return self.n_frames
def __add__(self, other):
"Concatenate two trajectories"
return self.join(other)
def __str__(self):
return "<%s>\n%s" % (self._string_summary_basic(), self._string_summary_unitcell())
def __repr__(self):
return "<%s at 0x%02x>\n%s" % (self._string_summary_basic(), id(self), self._string_summary_unitcell())
def superpose(self, reference, frame=0, atom_indices=None, parallel=True):
"""Superpose each conformation in this trajectory upon a reference
Parameters
----------
reference : md.Trajectory
For each conformation in this trajectory, aligned to a particular
reference conformation in another trajectory object.
frame : int
The index of the conformation in `reference` to align to.
atom_indices : array_like, or None
The indices of the atoms to superpose. If not
supplied, all atoms will be used.
parallel : bool
Use OpenMP to run the superposition in parallel over multiple cores
Returns
-------
self
"""
if atom_indices is None:
atom_indices = slice(None)
n_frames = self.xyz.shape[0]
self_align_xyz = np.asarray(self.xyz[:, atom_indices, :], order='c')
self_displace_xyz = np.asarray(self.xyz, order='c')
ref_align_xyz = np.array(reference.xyz[frame, atom_indices, :], copy=True, order='c').reshape(1, -1, 3)
offset = np.mean(self_align_xyz, axis=1, dtype=np.float64).reshape(n_frames, 1, 3)
self_align_xyz -= offset
if self_align_xyz.ctypes.data != self_displace_xyz.ctypes.data:
# when atom_indices is None, these two arrays alias the same memory
# so we only need to do the centering once
self_displace_xyz -= offset
ref_offset = ref_align_xyz[0].astype('float64').mean(0)
ref_align_xyz[0] -= ref_offset
self_g = np.einsum('ijk,ijk->i', self_align_xyz, self_align_xyz)
ref_g = np.einsum('ijk,ijk->i', ref_align_xyz , ref_align_xyz)
_rmsd.superpose_atom_major(
ref_align_xyz, self_align_xyz, ref_g, self_g, self_displace_xyz,
0, parallel=parallel)
self.xyz = self_displace_xyz + ref_offset
return self
def join(self, other, check_topology=True, discard_overlapping_frames=False):
"""Join two trajectories together along the time/frame axis.
This method joins trajectories along the time axis, giving a new trajectory
of length equal to the sum of the lengths of `self` and `other`.
It can also be called by using `self + other`
Parameters
----------
other : Trajectory or list of Trajectory
One or more trajectories to join with this one. These trajectories
are *appended* to the end of this trajectory.
check_topology : bool
Ensure that the topology of `self` and `other` are identical before
joining them. If false, the resulting trajectory will have the
topology of `self`.
discard_overlapping_frames : bool, optional
If True, compare coordinates at trajectory edges to discard overlapping
frames. Default: False.
See Also
--------
stack : join two trajectories along the atom axis
"""
if isinstance(other, Trajectory):
other = [other]
if isinstance(other, list):
if not all(isinstance(o, Trajectory) for o in other):
raise TypeError('You can only join Trajectory instances')
if not all(self.n_atoms == o.n_atoms for o in other):
raise ValueError('Number of atoms in self (%d) is not equal '
'to number of atoms in other' % (self.n_atoms))
if check_topology and not all(self.topology == o.topology for o in other):
raise ValueError('The topologies of the Trajectories are not the same')
if not all(self._have_unitcell == o._have_unitcell for o in other):
raise ValueError('Mixing trajectories with and without unitcell')
else:
raise TypeError('`other` must be a list of Trajectory. You supplied %d' % type(other))
# list containing all of the trajs to merge, including self
trajectories = [self] + other
if discard_overlapping_frames:
for i in range(len(trajectories)-1):
# last frame of trajectory i
x0 = trajectories[i].xyz[-1]
# first frame of trajectory i+1
x1 = trajectories[i + 1].xyz[0]
# check that all atoms are within 2e-3 nm
# (this is kind of arbitrary)
if np.all(np.abs(x1 - x0) < 2e-3):
trajectories[i] = trajectories[i][:-1]
xyz = np.concatenate([t.xyz for t in trajectories])
time = np.concatenate([t.time for t in trajectories])
angles = lengths = None
if self._have_unitcell:
angles = np.concatenate([t.unitcell_angles for t in trajectories])
lengths = np.concatenate([t.unitcell_lengths for t in trajectories])
# use this syntax so that if you subclass Trajectory,
# the subclass's join() will return an instance of the subclass
return self.__class__(xyz, deepcopy(self._topology), time=time,
unitcell_lengths=lengths, unitcell_angles=angles)
def stack(self, other):
"""Stack two trajectories along the atom axis
This method joins trajectories along the atom axis, giving a new trajectory
with a number of atoms equal to the sum of the number of atoms in
`self` and `other`.
Notes
-----
The resulting trajectory will have the unitcell and time information
the left operand.
Examples
--------
>>> t1 = md.load('traj1.h5')
>>> t2 = md.load('traj2.h5')
>>> # even when t2 contains no unitcell information
>>> t2.unitcell_vectors = None
>>> stacked = t1.stack(t2)
>>> # the stacked trajectory inherits the unitcell information
>>> # from the first trajectory
>>> np.all(stacked.unitcell_vectors == t1.unitcell_vectors)
True
Parameters
----------
other : Trajectory
The other trajectory to join
See Also
--------
join : join two trajectories along the time/frame axis.
"""
if not isinstance(other, Trajectory):
raise TypeError('You can only stack two Trajectory instances')
if self.n_frames != other.n_frames:
raise ValueError('Number of frames in self (%d) is not equal '
'to number of frames in other (%d)' % (self.n_frames, other.n_frames))
if self.topology is not None:
topology = self.topology.join(other.topology)
else:
topology = None
xyz = np.hstack((self.xyz, other.xyz))
return self.__class__(xyz=xyz, topology=topology, unitcell_angles=self.unitcell_angles,
unitcell_lengths=self.unitcell_lengths, time=self.time)
def __getitem__(self, key):
"Get a slice of this trajectory"
return self.slice(key)
def slice(self, key, copy=True):
"""Slice trajectory, by extracting one or more frames into a separate object
This method can also be called using index bracket notation, i.e
`traj[1] == traj.slice(1)`
Parameters
----------
key : {int, np.ndarray, slice}
The slice to take. Can be either an int, a list of ints, or a slice
object.
copy : bool, default=True
Copy the arrays after slicing. If you set this to false, then if
you modify a slice, you'll modify the original array since they
point to the same data.
"""
xyz = self.xyz[key]
time = self.time[key]
unitcell_lengths, unitcell_angles = None, None
if self.unitcell_angles is not None:
unitcell_angles = self.unitcell_angles[key]
if self.unitcell_lengths is not None:
unitcell_lengths = self.unitcell_lengths[key]
if copy:
xyz = xyz.copy()
time = time.copy()
topology = deepcopy(self._topology)
if self.unitcell_angles is not None:
unitcell_angles = unitcell_angles.copy()
if self.unitcell_lengths is not None:
unitcell_lengths = unitcell_lengths.copy()
newtraj = self.__class__(xyz, topology, time, unitcell_lengths=unitcell_lengths,
unitcell_angles=unitcell_angles)
return newtraj
def __init__(self, xyz, topology, time=None, unitcell_lengths=None, unitcell_angles=None):
# install the topology into the object first, so that when setting
# the xyz, we can check that it lines up (e.g. n_atoms), with the topology
self.topology = topology
self.xyz = xyz
# _rmsd_traces are the inner product of each centered conformation,
# which are required for computing RMSD. Normally these values are
# calculated on the fly in the cython code (rmsd/_rmsd.pyx), but
# optionally, we enable the use precomputed values which can speed
# up the calculation (useful for clustering), but potentially be unsafe
# if self._xyz is modified without a corresponding change to
# self._rmsd_traces. This array is populated computed by
# center_conformations, and no other methods should really touch it.
self._rmsd_traces = None
# box has no default, it'll just be none normally
self.unitcell_lengths = unitcell_lengths
self.unitcell_angles = unitcell_angles
# time will take the default 1..N
if time is None:
time = np.arange(len(self.xyz))
self.time = time
if (topology is not None) and (topology._numAtoms != self.n_atoms):
raise ValueError("Number of atoms in xyz (%s) and "
"in topology (%s) don't match" % (self.n_atoms, topology._numAtoms))
def openmm_positions(self, frame):
"""OpenMM-compatable positions of a single frame.
Examples
--------
>>> t = md.load('trajectory.h5')
>>> context.setPositions(t.openmm_positions(0))
Parameters
----------
frame : int
The index of frame of the trajectory that you wish to extract
Returns
-------
positions : list
The cartesian coordinates of specific trajectory frame, formatted
for input to OpenMM
"""
from simtk.openmm import Vec3