/
spectral_estimation.py
2232 lines (2012 loc) · 93.8 KB
/
spectral_estimation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------
# Filename: spectral_estimation.py
# Purpose: Various Routines Related to Spectral Estimation
# Author: Tobias Megies
# Email: tobias.megies@geophysik.uni-muenchen.de
#
# Copyright (C) 2011-2012 Tobias Megies
# -----------------------------------------------------------------------------
"""
Various Routines Related to Spectral Estimation
:copyright:
The ObsPy Development Team (devs@obspy.org)
:license:
GNU Lesser General Public License, Version 3
(https://www.gnu.org/copyleft/lesser.html)
"""
from __future__ import (absolute_import, division, print_function,
unicode_literals)
from future.builtins import * # NOQA
from future.utils import native_str
import bisect
import glob
import math
import os
import warnings
import numpy as np
from matplotlib import mlab
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.ticker import FormatStrFormatter
from matplotlib.patheffects import withStroke
from obspy import Stream, Trace, UTCDateTime, __version__
from obspy.core import Stats
from obspy.imaging.scripts.scan import compress_start_end
from obspy.core.inventory import Inventory
from obspy.core.util import AttribDict, NUMPY_VERSION
from obspy.core.util.base import MATPLOTLIB_VERSION
from obspy.core.util.obspy_types import ObsPyException
from obspy.imaging.cm import obspy_sequential
from obspy.imaging.util import _set_xaxis_obspy_dates
from obspy.io.xseed import Parser
from obspy.signal.invsim import cosine_taper
from obspy.signal.util import prev_pow_2
from obspy.signal.invsim import paz_to_freq_resp, evalresp
dtiny = np.finfo(0.0).tiny
NOISE_MODEL_FILE = os.path.join(os.path.dirname(__file__),
"data", "noise_models.npz")
earthquake_models = {
(1.5, 10): [[7.0700000e-01, 1.4140000e+00, 2.8280000e+00, 5.6600000e+00,
1.1300000e+01, 2.2600000e+01],
[1.1940722e-06, 5.9652811e-06, 4.2022150e-05, 2.1857010e-04,
6.4011669e-04, 1.0630361e-03]],
(1.5, 100): [[1.4140000e+00, 2.8280000e+00, 5.6600000e+00, 1.1300000e+01,
2.2600000e+01],
[2.0636508e-07, 1.0591860e-06, 4.4487964e-06, 1.1678490e-05,
1.2427714e-05]],
(2.5, 10): [[3.5350000e-01, 7.0700000e-01, 1.4140000e+00, 2.8280000e+00,
5.6600000e+00, 1.1300000e+01, 2.2600000e+01],
[3.1301161e-06, 1.9775227e-05, 1.5482427e-04, 8.3811450e-04,
3.7537242e-03, 7.3529155e-03, 6.5422494e-03]],
(2.5, 100): [[7.0700000e-01, 1.4140000e+00, 2.8280000e+00, 5.6600000e+00,
1.1300000e+01, 2.2600000e+01],
[7.4170515e-07, 4.9107516e-06, 2.5050832e-05, 8.4981163e-05,
9.5891456e-05, 6.5125171e-05]],
(3.5, 10): [[1.7670000e-01, 3.5350000e-01, 7.0700000e-01, 1.4140000e+00,
2.8280000e+00, 5.6600000e+00, 1.1300000e+01, 2.2600000e+01],
[1.4096634e-05, 8.5860094e-05, 5.4403050e-04, 3.4576905e-03,
1.3476040e-02, 3.4295149e-02, 4.7010730e-02, 2.3188069e-02]],
(3.5, 100): [[3.5350000e-01, 7.0700000e-01, 1.4140000e+00, 2.8280000e+00,
5.6600000e+00, 1.1300000e+01, 2.2600000e+01],
[2.0677442e-06, 1.4237587e-05, 8.5915636e-05, 3.8997357e-04,
1.0533844e-03, 8.7243737e-04, 2.8224063e-04]],
(4.5, 10): [[1.7670000e-01, 3.5350000e-01, 7.0700000e-01, 1.4140000e+00,
2.8280000e+00, 5.6600000e+00, 1.1300000e+01, 2.2600000e+01],
[4.0994260e-05, 2.9194046e-04, 1.9989323e-03, 1.0610332e-02,
3.1188683e-02, 6.5872809e-02, 7.7301339e-02, 2.7431279e-02]],
(4.5, 100): [[1.7670000e-01, 3.5350000e-01, 7.0700000e-01, 1.4140000e+00,
2.8280000e+00, 5.6600000e+00, 1.1300000e+01, 2.2600000e+01],
[8.3385376e-06, 4.8130301e-05, 3.1461832e-04, 1.1054180e-03,
3.2015584e-03, 5.9504166e-03, 3.4106672e-03, 1.0389085e-03]],
(5.5, 10): [[4.4187500e-02, 8.8375000e-02, 1.7670000e-01, 3.5350000e-01,
7.0700000e-01, 1.4140000e+00, 2.8280000e+00, 5.6600000e+00,
1.1300000e+01, 2.2600000e+01],
[5.1597812e-05, 2.4350754e-04, 1.5861857e-03, 9.1333683e-03,
5.4321168e-02, 3.0210031e-01, 4.4505525e-01, 3.3287588e-01,
3.0775399e-01, 1.2501407e-01]],
(5.5, 100): [[4.4187500e-02, 8.8375000e-02, 1.7670000e-01, 3.5350000e-01,
7.0700000e-01, 1.4140000e+00, 2.8280000e+00, 5.6600000e+00,
1.1300000e+01, 2.2600000e+01],
[7.6042044e-06, 5.0568931e-05, 2.6926792e-04, 9.7744507e-04,
3.4530700e-03, 9.2832164e-03, 1.5054122e-02, 1.3421025e-02,
8.0524871e-03, 2.0691071e-03]],
(6.5, 10): [[8.8375000e-02, 1.7670000e-01, 3.5350000e-01, 7.0700000e-01,
1.4140000e+00, 2.8280000e+00, 5.6600000e+00, 1.1300000e+01,
2.2600000e+01],
[1.7468292e-02, 6.5551361e-02, 1.8302926e-01, 3.9065640e-01,
7.1420714e-01, 1.0447672e+00, 1.2770160e+00, 1.0372500e+00,
5.0465056e-01]],
(6.5, 100): [[2.2097000e-02, 4.4187500e-02, 8.8375000e-02, 1.7670000e-01,
3.5350000e-01, 7.0700000e-01, 1.4140000e+00, 2.8280000e+00,
5.6600000e+00, 1.1300000e+01, 2.2600000e+01],
[1.4561822e-05, 1.4224456e-04, 8.7870935e-04, 2.4575511e-03,
8.0615599e-03, 2.3743599e-02, 4.8707533e-02, 7.0969056e-02,
7.6622487e-02, 2.6998756e-02, 4.6235290e-03]],
(7, 10): [[4.4187500e-02, 8.8375000e-02, 1.7670000e-01, 3.5350000e-01,
7.0700000e-01, 1.4140000e+00, 2.8280000e+00, 5.6600000e+00,
1.1300000e+01, 2.2600000e+01, 4.5200000e+01, 7.0700000e+01],
[9.1004029e-03, 4.7514014e-02, 1.5787725e-01, 3.1426661e-01,
6.3241827e-01, 1.0667690e+00, 1.2675411e+00, 1.1326387e+00,
8.3962478e-01, 4.1229082e-01, 1.6311586e-01, 5.6992659e-02]],
(7, 100): [[2.2097000e-02, 4.4187500e-02, 8.8375000e-02, 1.7670000e-01,
3.5350000e-01, 7.0700000e-01, 1.4140000e+00, 2.8280000e+00,
5.6600000e+00, 1.1300000e+01, 2.2600000e+01],
[5.8736062e-04, 3.2124167e-03, 1.5314560e-02, 3.3624616e-02,
6.4082408e-02, 1.3541913e-01, 1.8690009e-01, 1.8862548e-01,
1.1841223e-01, 4.8641263e-02, 1.3723779e-02]],
(6, 3000): [[5.0000000e-03, 4.5000000e-02, 3.0000000e-01, 1.3000000e+00,
6.0000000e+00],
[9.8100000e-10, 7.8480000e-07, 4.9050000e-06, 4.9050000e-06,
7.8480000e-07]],
(7, 3000): [[1.5000000e-03, 2.1000000e-02, 4.5000000e-02, 9.0000000e-02,
8.0000000e-01, 6.0000000e+00],
[1.4715000e-09, 7.8480000e-06, 2.9430000e-05, 3.9240000e-05,
1.4715000e-05, 9.8100000e-07]],
(8, 3000): [[1.5000000e-03, 1.0000000e-02, 4.5000000e-02, 9.0000000e-02,
3.0000000e+00, 6.0000000e+00],
[9.8100000e-09, 3.9240000e-06, 1.1772000e-04, 1.9620000e-04,
4.9050000e-06, 1.9620000e-06]]}
def fft_taper(data):
"""
Cosine taper, 10 percent at each end (like done by [McNamara2004]_).
.. warning::
Inplace operation, so data should be float.
"""
data *= cosine_taper(len(data), 0.2)
return data
def welch_taper(data):
"""
Applies a welch window to data. See
:func:`~obspy.signal.spectral_estimation.welch_window`.
.. warning::
Inplace operation, so data should be float.
:type data: :class:`~numpy.ndarray`
:param data: Data to apply the taper to. Inplace operation, but also
returns data for convenience.
:returns: Tapered data.
"""
data *= welch_window(len(data))
return data
def welch_window(n):
"""
Return a welch window for data of length n.
Routine is checked against PITSA for both even and odd values, but not for
strange values like n<5.
.. note::
See e.g.:
http://www.cescg.org/CESCG99/TTheussl/node7.html
:type n: int
:param n: Length of window function.
:rtype: :class:`~numpy.ndarray`
:returns: Window function for tapering data.
"""
n = math.ceil(n / 2.0)
taper_left = np.arange(n, dtype=np.float64)
taper_left = 1 - np.power(taper_left / n, 2)
# first/last sample is zero by definition
if n % 2 == 0:
# even number of samples: two ones in the middle, perfectly symmetric
taper_right = taper_left
else:
# odd number of samples: still two ones in the middle, however, not
# perfectly symmetric anymore. right side is shorter by one sample
nn = n - 1
taper_right = np.arange(nn, dtype=np.float64)
taper_right = 1 - np.power(taper_right / nn, 2)
taper_left = taper_left[::-1]
# first/last sample is zero by definition
taper_left[0] = 0.0
taper_right[-1] = 0.0
taper = np.concatenate((taper_left, taper_right))
return taper
class PPSD(object):
"""
Class to compile probabilistic power spectral densities for one combination
of network/station/location/channel/sampling_rate.
Calculations are based on the routine used by [McNamara2004]_.
For information on New High/Low Noise Model see [Peterson1993]_.
.. rubric:: Basic Usage
>>> from obspy import read
>>> from obspy.signal import PPSD
>>> st = read()
>>> tr = st.select(channel="EHZ")[0]
>>> paz = {'gain': 60077000.0,
... 'poles': [-0.037004+0.037016j, -0.037004-0.037016j,
... -251.33+0j, -131.04-467.29j, -131.04+467.29j],
... 'sensitivity': 2516778400.0,
... 'zeros': [0j, 0j]}
>>> ppsd = PPSD(tr.stats, paz)
>>> print(ppsd.id)
BW.RJOB..EHZ
>>> print(ppsd.times_processed)
[]
Now we could add data to the probabilistic psd (all processing like
demeaning, tapering and so on is done internally) and plot it like ...
>>> ppsd.add(st) # doctest: +SKIP
>>> print(ppsd.times_processed) # doctest: +SKIP
[UTCDateTime(...), UTCDateTime(...), ..., UTCDateTime(...)]
>>> ppsd.plot() # doctest: +SKIP
... but the example stream is too short and does not contain enough data.
.. note::
For a real world example see the `ObsPy Tutorial`_.
.. rubric:: Saving and Loading
The PPSD object supports saving to a numpy npz compressed binary file:
>>> ppsd.save_npz("myfile.npz") # doctest: +SKIP
The saved PPSD can then be loaded again using the static method
:func:`~obspy.signal.spectral_estimation.PPSD.load_npz`, e.g. to plot the
results again. If additional data is to be processed (note that another
option is to combine multiple npz files using
:meth:`~obspy.signal.spectral_estimation.PPSD.add_npz`), metadata must be
provided again, since they are not stored in the numpy npz file:
>>> ppsd = PPSD.load_npz("myfile.npz") # doctest: +SKIP
.. note::
When using metadata from an
:class:`~obspy.core.inventory.inventory.Inventory`,
a :class:`~obspy.io.xseed.parser.Parser` instance or from a RESP file,
information on metadata will be correctly picked for the respective
starttime of the data trace. This means that instrument changes are
correctly taken into account during response removal.
This is obviously not the case for a static PAZ dictionary!
.. _`ObsPy Tutorial`: https://docs.obspy.org/tutorial/
"""
NPZ_STORE_KEYS_LIST_TYPES = [
# things related to processed data
'_times_data',
'_times_gaps',
'_times_processed',
'_binned_psds']
NPZ_STORE_KEYS_VERSION_NUMBERS = [
# version numbers
'ppsd_version',
'obspy_version',
'numpy_version',
'matplotlib_version']
NPZ_STORE_KEYS_SIMPLE_TYPES = [
# things related to Stats passed at __init__
'id',
'sampling_rate',
# kwargs passed during __init__
'skip_on_gaps',
'ppsd_length',
'overlap',
'special_handling',
# attributes derived during __init__
'_len',
'_nlap',
'_nfft']
NPZ_STORE_KEYS_ARRAY_TYPES = [
# attributes derived during __init__
'_db_bin_edges',
'_psd_periods',
'_period_binning']
NPZ_STORE_KEYS = (
NPZ_STORE_KEYS_ARRAY_TYPES +
NPZ_STORE_KEYS_LIST_TYPES +
NPZ_STORE_KEYS_SIMPLE_TYPES +
NPZ_STORE_KEYS_VERSION_NUMBERS)
# A mapping of values for storing info in the NPZ file. This is needed
# because some types are not loadable without allowing pickle (#2409).
NPZ_SIMPLE_TYPE_MAP = {None: ''}
NPZ_SIMPLE_TYPE_MAP_R = {v: i for i, v in NPZ_SIMPLE_TYPE_MAP.items()}
# Add current version as a class attribute to avoid hard coding it.
_CURRENT_VERSION = 3
def __init__(self, stats, metadata, skip_on_gaps=False,
db_bins=(-200, -50, 1.), ppsd_length=3600.0, overlap=0.5,
special_handling=None, period_smoothing_width_octaves=1.0,
period_step_octaves=0.125, period_limits=None,
**kwargs): # @UnusedVariable
"""
Initialize the PPSD object setting all fixed information on the station
that should not change afterwards to guarantee consistent spectral
estimates.
The necessary instrument response information can be provided in
several ways using the `metadata` keyword argument:
* Providing an :class:`~obspy.core.inventory.inventory.Inventory`
object (e.g. read from a StationXML file using
:func:`~obspy.core.inventory.inventory.read_inventory` or fetched
from a :mod:`FDSN <obspy.clients.fdsn>` webservice).
* Providing an
:class:`obspy.io.xseed Parser <obspy.io.xseed.parser.Parser>`,
(e.g. containing metadata from a Dataless SEED file).
* Providing the filename/path to a local RESP file.
* Providing a dictionary containing poles and zeros information. Be
aware that this leads to wrong results if the instrument's response
is changing over the timespans that are added to the PPSD.
Use with caution!
:note: When using `special_handling="ringlaser"` the applied processing
steps are changed. Differentiation of data (converting velocity
to acceleration data) will be omitted and a flat instrument
response is assumed, leaving away response removal and only
dividing by `metadata['sensitivity']` specified in the provided
`metadata` dictionary (other keys do not have to be present
then). For scaling factors that are usually multiplied to the
data remember to use the inverse as `metadata['sensitivity']`.
:type stats: :class:`~obspy.core.trace.Stats`
:param stats: Stats of the station/instrument to process
:type metadata: :class:`~obspy.core.inventory.inventory.Inventory` or
:class:`~obspy.io.xseed Parser` or str or dict
:param metadata: Response information of instrument. See above notes
for details.
:type skip_on_gaps: bool, optional
:param skip_on_gaps: Determines whether time segments with gaps should
be skipped entirely. [McNamara2004]_ merge gappy
traces by filling with zeros. This results in a clearly
identifiable outlier psd line in the PPSD visualization. Select
`skip_on_gaps=True` for not filling gaps with zeros which might
result in some data segments shorter than `ppsd_length` not
used in the PPSD.
:type db_bins: tuple of three ints/floats
:param db_bins: Specify the lower and upper boundary and the width of
the db bins. The bin width might get adjusted to fit a number
of equally spaced bins in between the given boundaries.
:type ppsd_length: float, optional
:param ppsd_length: Length of data segments passed to psd in seconds.
In the paper by [McNamara2004]_ a value of 3600 (1 hour) was
chosen. Longer segments increase the upper limit of analyzed
periods but decrease the number of analyzed segments.
:type overlap: float, optional
:param overlap: Overlap of segments passed to psd. Overlap may take
values between 0 and 1 and is given as fraction of the length
of one segment, e.g. `ppsd_length=3600` and `overlap=0.5`
result in an overlap of 1800s of the segments.
:type special_handling: str, optional
:param special_handling: Switches on customized handling for
data other than seismometer recordings. Can be one of: 'ringlaser'
(no instrument correction, just division by
`metadata["sensitivity"]` of provided metadata dictionary),
'hydrophone' (no differentiation after instrument correction).
:type period_smoothing_width_octaves: float
:param period_smoothing_width_octaves: Determines over what
period/frequency range the psd is smoothed around every central
period/frequency. Given in fractions of octaves (default of ``1``
means the psd is averaged over a full octave at each central
frequency).
:type period_step_octaves: float
:param period_step_octaves: Step length on frequency axis in fraction
of octaves (default of ``0.125`` means one smoothed psd value on
the frequency axis is measured every 1/8 of an octave).
:type period_limits: tuple/list of two float
:param period_limits: Set custom lower and upper end of period range
(e.g. ``(0.01, 100)``). The specified lower end of period range
will be set as the central period of the first bin (geometric mean
of left/right edges of smoothing interval). At the upper end of the
specified period range, no more additional bins will be added after
the bin whose center frequency exceeds the given upper end for the
first time.
"""
# save things related to args
self.id = "%(network)s.%(station)s.%(location)s.%(channel)s" % stats
self.sampling_rate = stats.sampling_rate
self.metadata = metadata
# save things related to kwargs
self.skip_on_gaps = skip_on_gaps
self.db_bins = db_bins
self.ppsd_length = ppsd_length
self.overlap = overlap
self.special_handling = special_handling and special_handling.lower()
if self.special_handling == 'ringlaser':
if not isinstance(self.metadata, dict):
msg = ("When using `special_handling='ringlaser'`, `metadata` "
"must be a plain dictionary with key 'sensitivity' "
"stating the overall sensitivity`.")
raise TypeError(msg)
elif self.special_handling == 'hydrophone':
pass
elif self.special_handling is not None:
msg = "Unsupported value for 'special_handling' parameter: %s"
msg = msg % self.special_handling
raise ValueError(msg)
# save version numbers
self.ppsd_version = self._CURRENT_VERSION
self.obspy_version = __version__
self.matplotlib_version = ".".join(map(str, MATPLOTLIB_VERSION))
self.numpy_version = np.__version__
# calculate derived attributes
# nfft is determined mimicking the fft setup in McNamara&Buland
# paper:
# (they take 13 segments overlapping 75% and truncate to next lower
# power of 2)
# - take number of points of whole ppsd segment (default 1 hour)
nfft = self.ppsd_length * self.sampling_rate
# - make 13 single segments overlapping by 75%
# (1 full segment length + 25% * 12 full segment lengths)
nfft = nfft / 4.0
# - go to next smaller power of 2 for nfft
self._nfft = prev_pow_2(nfft)
# - use 75% overlap
# (we end up with a little more than 13 segments..)
self._nlap = int(0.75 * self.nfft)
# Trace length for one psd segment.
self._len = int(self.sampling_rate * self.ppsd_length)
# make an initial dummy psd and to get the array of periods
_, freq = mlab.psd(np.ones(self.len), self.nfft,
self.sampling_rate, noverlap=self.nlap)
# leave out first entry (offset)
freq = freq[1:]
self._psd_periods = 1.0 / freq[::-1]
if period_limits is None:
period_limits = (self.psd_periods[0], self.psd_periods[-1])
self._setup_period_binning(
period_smoothing_width_octaves, period_step_octaves, period_limits)
# setup db binning
# Set up the binning for the db scale.
num_bins = int((db_bins[1] - db_bins[0]) / db_bins[2])
self._db_bin_edges = np.linspace(db_bins[0], db_bins[1],
num_bins + 1, endpoint=True)
# lists related to persistent processed data
self._times_processed = []
self._times_data = []
self._times_gaps = []
self._binned_psds = []
# internal attributes for stacks on processed data
self._current_hist_stack = None
self._current_hist_stack_cumulative = None
self._current_times_used = []
self._current_times_all_details = []
@property
def network(self):
return self.id.split(".")[0]
@property
def station(self):
return self.id.split(".")[1]
@property
def location(self):
return self.id.split(".")[2]
@property
def channel(self):
return self.id.split(".")[3]
@property
def delta(self):
return 1.0 / self.sampling_rate
@property
def len(self):
"""
Trace length for one psd segment.
"""
return self._len
@property
def step(self):
"""
Time step between start times of adjacent psd segments in seconds
(assuming gap-less data).
"""
return self.ppsd_length * (1.0 - self.overlap)
@property
def nfft(self):
return self._nfft
@property
def nlap(self):
return self._nlap
@property
def merge_method(self):
if self.skip_on_gaps:
return -1
else:
return 0
@property
def db_bin_edges(self):
return self._db_bin_edges
@property
def db_bin_centers(self):
return (self.db_bin_edges[:-1] + self.db_bin_edges[1:]) / 2.0
@property
def psd_frequencies(self):
return 1.0 / self.psd_periods[::-1]
@property
def psd_periods(self):
return self._psd_periods
@property
def period_bin_centers(self):
"""
Return centers of period bins (geometric mean of left and right edge of
period smoothing ranges).
"""
return self._period_binning[2, :]
@property
def period_xedges(self):
"""
Returns edges of period histogram bins (one element longer than number
of bins). These are the edges of the plotted histogram/pcolormesh, but
not the edges used for smoothing along the period axis of the psd
(before binning).
"""
return np.concatenate([self._period_binning[1, 0:1],
self._period_binning[3, :]])
@property
def period_bin_left_edges(self):
"""
Returns left edges of period bins (same length as number of bins).
These are the edges used for smoothing along the period axis of the psd
(before binning), not the edges of the histogram/pcolormesh in the
plot.
"""
return self._period_binning[0, :]
@property
def period_bin_right_edges(self):
"""
Returns right edges of period bins (same length as number of bins).
These are the edges used for smoothing along the period axis of the psd
(before binning), not the edges of the histogram/pcolormesh in the
plot.
"""
return self._period_binning[4, :]
@property
def psd_values(self):
"""
Returns all individual smoothed psd arrays as a list. The corresponding
times can be accessed as :attr:`PPSD.times_processed`, the
corresponding central periods in seconds (central frequencies in Hertz)
can be accessed as :attr:`PPSD.psd_periods`
(:attr:`PPSD.psd_frequencies`).
"""
return self._binned_psds
@property
def times_processed(self):
return [UTCDateTime(ns=ns) for ns in self._times_processed]
@property
def times_data(self):
return [(UTCDateTime(ns=t1), UTCDateTime(ns=t2))
for t1, t2 in self._times_data]
@property
def times_gaps(self):
return [(UTCDateTime(ns=t1), UTCDateTime(ns=t2))
for t1, t2 in self._times_gaps]
@property
def current_histogram(self):
self.__check_histogram()
return self._current_hist_stack
@property
def current_histogram_cumulative(self):
self.__check_histogram()
return self._current_hist_stack_cumulative
@property
def current_histogram_count(self):
self.__check_histogram()
return len(self._current_times_used)
@property
def current_times_used(self):
self.__check_histogram()
return [UTCDateTime(ns=ns) for ns in self._current_times_used]
def _setup_period_binning(self, period_smoothing_width_octaves,
period_step_octaves, period_limits):
"""
Set up period binning.
"""
# we step through the period range at step width controlled by
# period_step_octaves (default 1/8 octave)
period_step_factor = 2 ** period_step_octaves
# the width of frequencies we average over for every bin is controlled
# by period_smoothing_width_octaves (default one full octave)
period_smoothing_width_factor = \
2 ** period_smoothing_width_octaves
# calculate left/right edge and center of first period bin
# set first smoothing bin's left edge such that the center frequency is
# the lower limit specified by the user (or the lowest period in the
# psd)
per_left = (period_limits[0] /
(period_smoothing_width_factor ** 0.5))
per_right = per_left * period_smoothing_width_factor
per_center = math.sqrt(per_left * per_right)
# build up lists
per_octaves_left = [per_left]
per_octaves_right = [per_right]
per_octaves_center = [per_center]
# do this for the whole period range and append the values to our lists
while per_center < period_limits[1]:
# move left edge of smoothing bin further
per_left *= period_step_factor
# determine right edge of smoothing bin
per_right = per_left * period_smoothing_width_factor
# determine center period of smoothing/binning
per_center = math.sqrt(per_left * per_right)
# append to lists
per_octaves_left.append(per_left)
per_octaves_right.append(per_right)
per_octaves_center.append(per_center)
per_octaves_left = np.array(per_octaves_left)
per_octaves_right = np.array(per_octaves_right)
per_octaves_center = np.array(per_octaves_center)
valid = per_octaves_right > self.psd_periods[0]
valid &= per_octaves_left < self.psd_periods[-1]
per_octaves_left = per_octaves_left[valid]
per_octaves_right = per_octaves_right[valid]
per_octaves_center = per_octaves_center[valid]
self._period_binning = np.vstack([
# left edge of smoothing (for calculating the bin value from psd
per_octaves_left,
# left xedge of bin (for plotting)
per_octaves_center / (period_step_factor ** 0.5),
# bin center (for plotting)
per_octaves_center,
# right xedge of bin (for plotting)
per_octaves_center * (period_step_factor ** 0.5),
# right edge of smoothing (for calculating the bin value from psd
per_octaves_right])
def __sanity_check(self, trace):
"""
Checks if trace is compatible for use in the current PPSD instance.
Returns True if trace can be used or False if not.
:type trace: :class:`~obspy.core.trace.Trace`
"""
if trace.id != self.id:
return False
if trace.stats.sampling_rate != self.sampling_rate:
return False
return True
def __insert_processed_data(self, utcdatetime, spectrum):
"""
Inserts the given UTCDateTime and processed/octave-binned spectrum at
the right position in the lists, keeping the order intact.
Replaces old :meth:`PPSD.__insert_used_time()` private method and the
addition ot the histogram stack that was performed directly in
:meth:`PPSD.__process()`.
:type utcdatetime: :class:`~obspy.core.utcdatetime.UTCDateTime`
:type spectrum: :class:`numpy.ndarray`
"""
t = utcdatetime._ns
ind = bisect.bisect(self._times_processed, t)
self._times_processed.insert(ind, t)
self._binned_psds.insert(ind, spectrum)
def __insert_gap_times(self, stream):
"""
Gets gap information of stream and adds the encountered gaps to the gap
list of the PPSD instance.
:type stream: :class:`~obspy.core.stream.Stream`
"""
self._times_gaps += [[gap[4]._ns, gap[5]._ns]
for gap in stream.get_gaps()]
def __insert_data_times(self, stream):
"""
Gets gap information of stream and adds the encountered gaps to the gap
list of the PPSD instance.
:type stream: :class:`~obspy.core.stream.Stream`
"""
self._times_data += \
[[tr.stats.starttime._ns, tr.stats.endtime._ns]
for tr in stream]
def __check_time_present(self, utcdatetime):
"""
Checks if the given UTCDateTime is already part of the current PPSD
instance. That is, checks if from utcdatetime to utcdatetime plus
ppsd_length there is already data in the PPSD.
Returns True if adding ppsd_length starting at the given time
would result in an overlap of the ppsd data base, False if it is OK to
insert this piece of data.
"""
if not self._times_processed:
return False
# new data comes before existing data.
if utcdatetime._ns < self._times_processed[0]:
overlap_seconds = (
(utcdatetime._ns + self.ppsd_length * 1e9) -
self._times_processed[0]) / 1e9
# the new data is welcome if any overlap that would be introduced
# is less or equal than the overlap used by default on continuous
# data.
if overlap_seconds / self.ppsd_length > self.overlap:
return True
else:
return False
# new data exactly at start of first data segment
elif utcdatetime._ns == self._times_processed[0]:
return True
# new data comes after existing data.
elif utcdatetime._ns > self._times_processed[-1]:
overlap_seconds = (
(self._times_processed[-1] + self.ppsd_length * 1e9) -
utcdatetime._ns) / 1e9
# the new data is welcome if any overlap that would be introduced
# is less or equal than the overlap used by default on continuous
# data.
if overlap_seconds / self.ppsd_length > self.overlap:
return True
else:
return False
# new data exactly at start of last data segment
elif utcdatetime._ns == self._times_processed[-1]:
return True
# otherwise we are somewhere within the currently already present time
# range..
else:
index1 = bisect.bisect_left(self._times_processed,
utcdatetime._ns)
index2 = bisect.bisect_right(self._times_processed,
utcdatetime._ns)
# if bisect left/right gives same result, we are not exactly at one
# sampling point but in between to timestamps
if index1 == index2:
t1 = self._times_processed[index1 - 1]
t2 = self._times_processed[index1]
# check if we are overlapping on left side more than the normal
# overlap specified during init
overlap_seconds_left = (
(t1 + self.ppsd_length * 1e9) - utcdatetime._ns) / 1e9
# check if we are overlapping on right side more than the
# normal overlap specified during init
overlap_seconds_right = (
(utcdatetime._ns + self.ppsd_length * 1e9) - t2) / 1e9
max_overlap = max(overlap_seconds_left,
overlap_seconds_right) / self.ppsd_length
if max_overlap > self.overlap:
return True
else:
return False
# if bisect left/right gives different results, we are at exactly
# one timestamp that is already present
else:
return True
raise NotImplementedError('This should not happen, please report on '
'github.')
def __check_histogram(self):
# check if any data has been added yet
if self._current_hist_stack is None:
if self._times_processed:
self.calculate_histogram()
else:
msg = 'No data accumulated'
raise Exception(msg)
def __invalidate_histogram(self):
self._current_hist_stack = None
self._current_hist_stack_cumulative = None
self._current_times_used = []
self._current_times_all_details = []
def add(self, stream, verbose=False):
"""
Process all traces with compatible information and add their spectral
estimates to the histogram containing the probabilistic psd.
Also ensures that no piece of data is inserted twice.
:type stream: :class:`~obspy.core.stream.Stream` or
:class:`~obspy.core.trace.Trace`
:param stream: Stream or trace with data that should be added to the
probabilistic psd histogram.
:returns: True if appropriate data were found and the ppsd statistics
were changed, False otherwise.
"""
if self.metadata is None:
msg = ("PPSD instance has no metadata attached, which are needed "
"for processing the data. When using 'PPSD.load_npz()' use "
"'metadata' kwarg to provide metadata.")
raise Exception(msg)
# return later if any changes were applied to the ppsd statistics
changed = False
# prepare the list of traces to go through
if isinstance(stream, Trace):
stream = Stream([stream])
if not stream:
msg = 'Empty stream object provided to PPSD.add()'
warnings.warn(msg)
return False
# select appropriate traces
stream = stream.select(id=self.id)
if not stream:
msg = 'No traces with matching SEED ID in provided stream object.'
warnings.warn(msg)
return False
stream = stream.select(sampling_rate=self.sampling_rate)
if not stream:
msg = ('No traces with matching sampling rate in provided stream '
'object.')
warnings.warn(msg)
return False
# save information on available data and gaps
self.__insert_data_times(stream)
self.__insert_gap_times(stream)
# merge depending on skip_on_gaps set during __init__
stream.merge(self.merge_method, fill_value=0)
for tr in stream:
# the following check should not be necessary due to the select()..
if not self.__sanity_check(tr):
msg = "Skipping incompatible trace."
warnings.warn(msg)
continue
t1 = tr.stats.starttime
t2 = tr.stats.endtime
while t1 + self.ppsd_length - tr.stats.delta <= t2:
if self.__check_time_present(t1):
msg = "Already covered time spans detected (e.g. %s), " + \
"skipping these slices."
msg = msg % t1
warnings.warn(msg)
else:
# throw warnings if trace length is different
# than ppsd_length..!?!
slice = tr.slice(t1, t1 + self.ppsd_length -
tr.stats.delta)
# XXX not good, should be working in place somehow
# XXX how to do it with the padding, though?
success = self.__process(slice)
if success:
if verbose:
print(t1)
changed = True
t1 += (1 - self.overlap) * self.ppsd_length # advance
# enforce time limits, pad zeros if gaps
# tr.trim(t, t+PPSD_LENGTH, pad=True)
if changed:
self.__invalidate_histogram()
return changed
def __process(self, tr):
"""
Processes a segment of data and save the psd information.
Whether `Trace` is compatible (station, channel, ...) has to
checked beforehand.
:type tr: :class:`~obspy.core.trace.Trace`
:param tr: Compatible Trace with data of one PPSD segment
:returns: `True` if segment was successfully processed,
`False` otherwise.
"""
# XXX DIRTY HACK!!
if len(tr) == self.len + 1:
tr.data = tr.data[:-1]
# one last check..
if len(tr) != self.len:
msg = ("Got a piece of data with wrong length. Skipping:\n" +
str(tr))
warnings.warn(msg)
return False
# being paranoid, only necessary if in-place operations would follow
tr.data = tr.data.astype(np.float64)
# if trace has a masked array we fill in zeros
try:
tr.data[tr.data.mask] = 0.0
# if it is no masked array, we get an AttributeError
# and have nothing to do
except AttributeError:
pass
# restitution:
# mcnamara apply the correction at the end in freq-domain,
# does it make a difference?
# probably should be done earlier on bigger chunk of data?!
# Yes, you should avoid removing the response until after you
# have estimated the spectra to avoid elevated lp noise
spec, _freq = mlab.psd(tr.data, self.nfft, self.sampling_rate,
detrend=mlab.detrend_linear, window=fft_taper,
noverlap=self.nlap, sides='onesided',
scale_by_freq=True)
# leave out first entry (offset)
spec = spec[1:]
# working with the periods not frequencies later so reverse spectrum
spec = spec[::-1]
# Here we remove the response using the same conventions
# since the power is squared we want to square the sensitivity
# we can also convert to acceleration if we have non-rotational data
if self.special_handling == "ringlaser":
# in case of rotational data just remove sensitivity
spec /= self.metadata['sensitivity'] ** 2
# special_handling "hydrophone" does instrument correction same as
# "normal" data
else:
# determine instrument response from metadata
try:
resp = self._get_response(tr)
except Exception as e:
msg = ("Error getting response from provided metadata:\n"
"%s: %s\n"
"Skipping time segment(s).")
msg = msg % (e.__class__.__name__, str(e))
warnings.warn(msg)
return False
resp = resp[1:]
resp = resp[::-1]
# Now get the amplitude response (squared)
respamp = np.absolute(resp * np.conjugate(resp))
# Make omega with the same conventions as spec
w = 2.0 * math.pi * _freq[1:]
w = w[::-1]
# Here we do the response removal
# Do not differentiate when `special_handling="hydrophone"`
if self.special_handling == "hydrophone":
spec = spec / respamp
else:
spec = (w ** 2) * spec / respamp
# avoid calculating log of zero
idx = spec < dtiny
spec[idx] = dtiny
# go to dB
spec = np.log10(spec)
spec *= 10
smoothed_psd = []
# do this for the whole period range and append the values to our lists
for per_left, per_right in zip(self.period_bin_left_edges,
self.period_bin_right_edges):
specs = spec[(per_left <= self.psd_periods) &
(self.psd_periods <= per_right)]
smoothed_psd.append(specs.mean())
smoothed_psd = np.array(smoothed_psd, dtype=np.float32)
self.__insert_processed_data(tr.stats.starttime, smoothed_psd)