/
parser.py
1023 lines (968 loc) · 43 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# -*- coding: utf-8 -*-
"""
Main module containing XML-SEED parser.
:copyright:
The ObsPy Development Team (devs@obspy.org)
:license:
GNU Lesser General Public License, Version 3
(http://www.gnu.org/copyleft/lesser.html)
"""
from StringIO import StringIO
from lxml.etree import Element, SubElement, tostring, parse as xmlparse
from obspy.xseed import DEFAULT_XSEED_VERSION, utils, blockette
from obspy.xseed.utils import SEEDParserException
from obspy.core.util import getExampleFile, deprecated_keywords
import math
import os
import warnings
import zipfile
import copy
import urllib2
CONTINUE_FROM_LAST_RECORD = '*'
HEADERS = ['V', 'A', 'S']
# @see: http://www.iris.edu/manuals/SEEDManual_V2.4.pdf, p. 22-24
HEADER_INFO = {
'V': {'name': 'Volume Index Control Header',
'blockettes': [10, 11, 12]},
'A': {'name': 'Abbreviation Dictionary Control Header',
'blockettes': [30, 31, 32, 33, 34, 41, 43, 44, 45, 46, 47, 48]},
'S': {'name': 'Station Control Header',
'blockettes': [50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62]}
}
RESP_BLOCKETTES = [53, 54, 55, 56, 57, 58, 60, 61, 62]
XSEED_VERSIONS = ['1.0', '1.1']
# Index fields of the abbreviation blockettes.
INDEX_FIELDS = {30: 'data_format_identifier_code',
31: 'comment_code_key',
32: 'source_lookup_code',
33: 'abbreviation_lookup_code',
34: 'unit_lookup_code',
35: 'beam_lookup_code'
}
class Parser(object):
"""
The XML-SEED parser class parses dataless or full SEED volumes.
.. seealso::
The SEED file format description can be found at
http://www.iris.edu/manuals/SEEDManual_V2.4.pdf.
The XML-SEED format was proposed in:
* http://www.orfeus-eu.org/Organization/Newsletter/vol6no2/xml.shtml
* http://www.jamstec.go.jp/pacific21/xmlninja/.
"""
def __init__(self, data=None, debug=False, strict=False,
compact=False):
"""
Initializes the SEED parser.
:param data: Filename, URL, XSEED/SEED string, file pointer or StringIO
:type debug: Boolean.
:param debug: Enables a verbose debug log during parsing of SEED file.
:type strict: Boolean.
:param strict: Parser will raise an exception if SEED files does not
stay within the SEED specifications.
:type compact: Boolean.
:param compact: SEED volume will contain compact data strings. Missing
time strings will be filled with 00:00:00.0000 if this option is
disabled.
"""
self.record_length = 4096
self.version = 2.4
self.blockettes = {}
self.debug = debug
self.strict = strict
self.compact = compact
self._format = None
# All parsed data is organized in volume, abbreviations and a list of
# stations.
self.volume = None
self.abbreviations = None
self.stations = []
# if a file name is given, read it directly to the parser object
if data:
self.read(data)
def __str__(self):
"""
"""
try:
if len(self.stations) == 0:
return 'No data'
except:
return 'No data'
else:
temp = ""
for station in self.stations:
for blockette in station:
if blockette.id != 52:
continue
temp += station[0].network_code + '.'
temp += station[0].station_call_letters + '.'
temp += blockette.location_identifier.strip() + '.'
temp += blockette.channel_identifier + ' | '
# Add times.
temp += str(blockette.start_date) + ' - '
temp += str(blockette.end_date) + os.linesep
return temp.strip()
def read(self, data):
"""
General parser method for XML-SEED and Dataless SEED files.
:type data: Filename, URL, Basestring or StringIO object.
:param data: Filename, URL or XSEED/SEED string as file pointer or
StringIO.
"""
if getattr(self, "_format", None):
warnings.warn("Clearing parser before every subsequent read()")
self.__init__()
# try to transform everything into StringIO object
if isinstance(data, basestring):
# if it starts with /path/to/ try to search in examples
if data.startswith('/path/to/'):
try:
data = getExampleFile(data[9:])
except:
# otherwise just try to read the given /path/to folder
pass
if "://" in data:
# some URL
data = urllib2.urlopen(data).read()
elif os.path.isfile(data):
# looks like a file - read it
data = open(data, 'rb').read()
# but could also be a big string with data
data = StringIO(data)
elif not hasattr(data, "read"):
raise TypeError
# check first byte of data StringIO object
first_byte = data.read(1)
data.seek(0)
if first_byte.isdigit():
# SEED volumes starts with a number
self._parseSEED(data)
self._format = 'SEED'
elif first_byte == '<':
# XML files should always starts with an '<'
self._parseXSEED(data)
self._format = 'XSEED'
else:
raise IOError
def getXSEED(self, version=DEFAULT_XSEED_VERSION, split_stations=False):
"""
Returns a XSEED representation of the current Parser object.
:type version: float, optional
:param version: XSEED version string (default is ``1.1``).
:type split_stations: boolean, optional
:param split_stations: Splits stations containing multiple channels
into multiple documents.
:rtype: str or dict
:return: Returns either a string or a dict of strings depending
on the flag ``split_stations``.
"""
if version not in XSEED_VERSIONS:
raise SEEDParserException("Unknown XML-SEED version!")
doc = Element("xseed", version=version)
# Nothing to write if not all necessary data is available.
if not self.volume or not self.abbreviations or \
len(self.stations) == 0:
msg = 'No data to be written available.'
raise SEEDParserException(msg)
# Check blockettes:
if not self._checkBlockettes():
msg = 'Not all necessary blockettes are available.'
raise SEEDParserException(msg)
# Add blockettes 11 and 12 only for XSEED version 1.0.
if version == '1.0':
self._createBlockettes11and12(blockette12=True)
# Now start actually filling the XML tree.
# Volume header:
sub = SubElement(doc, utils.toTag('Volume Index Control Header'))
for blockette in self.volume:
sub.append(blockette.getXML(xseed_version=version))
# Delete blockettes 11 and 12 if necessary.
if version == '1.0':
self._deleteBlockettes11and12()
# Abbreviations:
sub = SubElement(doc,
utils.toTag('Abbreviation Dictionary Control Header'))
for blockette in self.abbreviations:
sub.append(blockette.getXML(xseed_version=version))
if not split_stations:
# Don't split stations
for station in self.stations:
sub = SubElement(doc, utils.toTag('Station Control Header'))
for blockette in station:
sub.append(blockette.getXML(xseed_version=version))
if version == '1.0':
# To pass the XSD schema test an empty time span control header
# is added to the end of the file.
SubElement(doc, utils.toTag('Timespan Control Header'))
# Also no data is present in all supported SEED files.
SubElement(doc, utils.toTag('Data Records'))
# Return single XML String.
return tostring(doc, pretty_print=True, xml_declaration=True,
encoding='UTF-8')
else:
# generate a dict of XML resources for each station
result = {}
for station in self.stations:
cdoc = copy.copy(doc)
sub = SubElement(cdoc, utils.toTag('Station Control Header'))
for blockette in station:
sub.append(blockette.getXML(xseed_version=version))
if version == '1.0':
# To pass the XSD schema test an empty time span control
# header is added to the end of the file.
SubElement(doc, utils.toTag('Timespan Control Header'))
# Also no data is present in all supported SEED files.
SubElement(doc, utils.toTag('Data Records'))
id = station[0].end_effective_date
result[id] = tostring(cdoc, pretty_print=True,
xml_declaration=True, encoding='UTF-8')
return result
def writeXSEED(self, filename, *args, **kwargs):
"""
Writes a XML-SEED file with given name.
"""
result = self.getXSEED(*args, **kwargs)
if isinstance(result, basestring):
open(filename, 'w').write(result)
return
elif isinstance(result, dict):
for key, value in result.iteritems():
if key is not '':
# past meta data - append timestamp
fn = filename.split('.xml')[0]
fn = "%s.%s.xml" % (filename, key.timestamp)
else:
# current meta data - leave original filename
fn = filename
open(fn, 'w').write(value)
return
else:
raise TypeError
def getSEED(self, compact=False):
"""
Returns a SEED representation of the current Parser object.
"""
self.compact = compact
# Nothing to write if not all necessary data is available.
if not self.volume or not self.abbreviations or not self.stations:
msg = 'No data to be written available.'
raise SEEDParserException(msg)
# Check blockettes:
if not self._checkBlockettes():
msg = 'Not all necessary blockettes are available.'
raise SEEDParserException(msg)
# String to be written to:
seed_string = ''
cur_count = 1
volume, abbreviations, stations = self._createBlockettes11and12()
# Delete Blockette 11 again.
self._deleteBlockettes11and12()
# Finally write the actual SEED String.
for _i in volume:
seed_string += '%06i' % cur_count + _i
cur_count += 1
for _i in abbreviations:
seed_string += '%06i' % cur_count + _i
cur_count += 1
# Remove name of the stations.
stations = [_i[1:] for _i in stations]
for _i in stations:
for _j in _i:
seed_string += '%06i' % cur_count + _j
cur_count += 1
return seed_string
def writeSEED(self, filename, *args, **kwargs):
"""
Writes a dataless SEED file with given name.
"""
fh = open(filename, 'wb')
fh.write(self.getSEED(*args, **kwargs))
fh.close()
def getRESP(self):
"""
Returns a RESP representation of the current Parser object.
It aims to produce the same RESP files as when running rdseed with
the command: "rdseed -f seed.test -R".
"""
# Check if there are any stations at all.
if len(self.stations) == 0:
raise Exception('No data to be written.')
filename = None
# Channel Response list.
resp_list = []
# Loop over all stations.
for station in self.stations:
resp = StringIO('')
blockettes = []
# Read the current station information and store it.
cur_station = station[0].station_call_letters.strip()
cur_network = station[0].network_code.strip()
# Loop over all blockettes in that station.
for _i in xrange(1, len(station)):
# Catch all blockette 52.
if station[_i].id == 52:
cur_location = station[_i].location_identifier.strip()
cur_channel = station[_i].channel_identifier.strip()
# Take old list and send it to the RESP parser.
if resp.len != 0:
# Send the blockettes to the parser and append to list.
self._getRESPString(resp, blockettes, cur_station)
resp_list.append([filename, resp])
# Create the filename.
filename = 'RESP.%s.%s.%s.%s' \
% (cur_network, cur_station, cur_location, cur_channel)
# Create new StringIO and list.
resp = StringIO('')
blockettes = []
blockettes.append(station[_i])
# Write header and the first two lines to the string.
header = \
'#\t\t<< obspy.xseed, Version 0.1.3 >>\n' + \
'#\t\t\n' + \
'#\t\t======== CHANNEL RESPONSE DATA ========\n' + \
'B050F03 Station: %s\n' % cur_station + \
'B050F16 Network: %s\n' % cur_network
# Write to StringIO.
resp.write(header)
continue
blockettes.append(station[_i])
# It might happen that no blockette 52 is specified,
if len(blockettes) != 0:
# One last time for the last channel.
self._getRESPString(resp, blockettes, cur_station)
resp_list.append([filename, resp])
# Combine multiple channels.
new_resp_list = []
available_channels = [_i[0] for _i in resp_list]
channel_set = set(available_channels)
for channel in channel_set:
channel_list = [_i for _i in resp_list if _i[0] == channel]
if len(channel_list) == 1:
new_resp_list.append(channel_list[0])
else:
for _i in xrange(1, len(channel_list)):
channel_list[_i][1].seek(0, 0)
channel_list[0][1].write(channel_list[_i][1].read())
new_resp_list.append(channel_list[0])
return new_resp_list
def _select(self, seed_id, datetime=None):
"""
Selects all blockettes related to given SEED id and datetime.
"""
# parse blockettes if not SEED
if self._format != 'SEED':
self.__init__(self.getSEED())
# split id
if '.' in seed_id:
net, sta, loc, cha = seed_id.split('.')
else:
cha = seed_id
net = sta = loc = None
# create a copy of station list
stations = list(self.stations)
# filter blockettes list by given SEED id
station_flag = False
channel_flag = False
blockettes = []
for station in stations:
for blk in station:
if blk.id == 50:
station_flag = False
if net is not None and blk.network_code != net:
continue
if sta is not None and blk.station_call_letters != sta:
continue
if datetime is not None:
if blk.start_effective_date > datetime:
continue
if blk.end_effective_date and \
blk.end_effective_date < datetime:
continue
station_flag = True
blockettes.append(blk)
elif blk.id == 52 and station_flag:
channel_flag = False
if loc is not None and blk.location_identifier != loc:
continue
if blk.channel_identifier != cha:
continue
if datetime is not None:
if blk.start_date > datetime:
continue
if blk.end_date and blk.end_date < datetime:
continue
channel_flag = True
blockettes.append(blk)
elif channel_flag and station_flag:
blockettes.append(blk)
# check number of selected channels (equals number of blockette 52)
b50s = [b for b in blockettes if b.id == 50]
b52s = [b for b in blockettes if b.id == 52]
if len(b50s) == 0 or len(b52s) == 0:
msg = 'No channel found with the given SEED id: %s'
raise SEEDParserException(msg % (seed_id))
elif len(b50s) > 1 or len(b52s) > 1:
msg = 'More than one channel found with the given SEED id: %s'
raise SEEDParserException(msg % (seed_id))
return blockettes
@deprecated_keywords({'channel_id': 'seed_id'})
def getPAZ(self, seed_id, datetime=None):
"""
Return PAZ.
.. note:: Currently only the Laplace transform is supported, that
is blockettes 43 and 53. A UserWarning will be raised for
unsupported response blockettes, however all other values, such
as overall sensitivity, normalization constant, etc. will be still
returned if found.
:type seed_id: str
:param seed_id: SEED or channel id, e.g. ``"BW.RJOB..EHZ"`` or
``"EHE"``.
:type datetime: :class:`~obspy.core.utcdatetime.UTCDateTime`, optional
:param datetime: Timestamp of requested PAZ values
:return: Dictionary containing PAZ as well as the overall
sensitivity, the gain in the dictionary is the A0 normalization
constant
"""
blockettes = self._select(seed_id, datetime)
data = {}
for blockette in blockettes:
if blockette.id == 58:
if blockette.stage_sequence_number == 0:
data['sensitivity'] = blockette.sensitivity_gain
elif blockette.stage_sequence_number == 1:
data['seismometer_gain'] = blockette.sensitivity_gain
elif blockette.stage_sequence_number == 2:
data['digitizer_gain'] = blockette.sensitivity_gain
elif blockette.id == 53 or blockette.id == 60:
if blockette.id == 60:
abbreviation = blockette.stages[0][1]
data['seismometer_gain'] = \
[blk.sensitivity_gain for blk in self.abbreviations
if hasattr(blk, 'response_lookup_key') and \
blk.response_lookup_key == abbreviation][0]
abbreviation = blockette.stages[0][0]
resp = [blk for blk in self.abbreviations
if hasattr(blk, 'response_lookup_key') and \
blk.response_lookup_key == abbreviation][0]
label = 'response_type'
else:
resp = blockette
label = 'transfer_function_types'
# Check if Laplace transform
if getattr(resp, label) != "A":
msg = 'Only supporting Laplace transform response ' + \
'type. Skipping other response information.'
warnings.warn(msg, UserWarning)
continue
# A0_normalization_factor
data['gain'] = resp.A0_normalization_factor
# Poles
data['poles'] = []
for i in range(resp.number_of_complex_poles):
try:
p = complex(resp.real_pole[i], resp.imaginary_pole[i])
except TypeError:
p = complex(resp.real_pole, resp.imaginary_pole)
data['poles'].append(p)
# Zeros
data['zeros'] = []
for i in range(resp.number_of_complex_zeros):
try:
z = complex(resp.real_zero[i], resp.imaginary_zero[i])
except TypeError:
z = complex(resp.real_zero, resp.imaginary_zero)
data['zeros'].append(z)
return data
@deprecated_keywords({'channel_id': 'seed_id'})
def getCoordinates(self, seed_id, datetime=None):
"""
Return Coordinates (from blockette 52)
:type seed_id: str
:param seed_id: SEED or channel id, e.g. ``"BW.RJOB..EHZ"`` or
``"EHE"``.
:type datetime: :class:`~obspy.core.utcdatetime.UTCDateTime`, optional
:param datetime: Timestamp of requested PAZ values
:return: Dictionary containing Coordinates (latitude, longitude,
elevation)
"""
blockettes = self._select(seed_id, datetime)
data = {}
for blockette in blockettes:
if blockette.id == 52:
data['latitude'] = blockette.latitude
data['longitude'] = blockette.longitude
data['elevation'] = blockette.elevation
break
return data
def writeRESP(self, folder, zipped=False):
"""
Writes for each channel a RESP file within a given folder.
:param folder: Folder name.
:param zipped: Compresses all files into a single ZIP archive named by
the folder name extended with the extension '.zip'.
"""
new_resp_list = self.getRESP()
if not zipped:
# Write single files.
for response in new_resp_list:
if folder:
file = open(os.path.join(folder, response[0]), 'w')
else:
file = open(response[0], 'w')
response[1].seek(0, 0)
file.write(response[1].read())
file.close()
else:
# Create a ZIP archive.
zip_file = zipfile.ZipFile(folder + os.extsep + "zip", "w")
for response in new_resp_list:
response[1].seek(0, 0)
zip_file.writestr(response[0], response[1].read())
zip_file.close()
def _parseSEED(self, data):
"""
Parses through a whole SEED volume.
It will always parse the whole file and skip any time span data.
:type data: File pointer or StringIO object.
"""
# Jump to the beginning of the file.
data.seek(0)
# Retrieve some basic data like version and record length.
temp = data.read(8)
# Check whether it starts with record sequence number 1 and a volume
# index control header.
if temp != '000001V ':
raise SEEDParserException("Expecting 000001V ")
# The first blockette has to be Blockette 10.
temp = data.read(3)
if temp not in ['010', '008', '005']:
raise SEEDParserException("Expecting blockette 010, 008 or 005")
# Skip the next four bytes containing the length of the blockette.
data.seek(4, 1)
# Set the version.
self.version = float(data.read(4))
# Get the record length.
length = pow(2, int(data.read(2)))
# Test record length.
data.seek(length)
temp = data.read(6)
if temp != '000002':
msg = "Got an invalid logical record length %d" % length
raise SEEDParserException(msg)
self.record_length = length
if self.debug:
print("RECORD LENGTH: %d" % (self.record_length))
# Set all temporary attributes.
self.temp = {'volume': [], 'abbreviations': [], 'stations': []}
# Jump back to beginning.
data.seek(0)
# Read the first record.
record = data.read(self.record_length)
merged_data = ''
record_type = None
# Loop through file and pass merged records to _parseMergedData.
while record:
record_continuation = (record[7] == CONTINUE_FROM_LAST_RECORD)
same_record_type = (record[6] == record_type)
if record_type == 'S' and record[8:11] != '050':
record_continuation = True
if record_continuation and same_record_type:
# continued record
merged_data += record[8:]
else:
self._parseMergedData(merged_data.strip(), record_type)
# first or new type of record
record_type = record[6]
merged_data = record[8:]
if record_type not in HEADERS:
# only parse headers, no data
merged_data = ''
record_type = None
break
if self.debug:
if not record_continuation:
print("========")
print(record[0:8])
record = data.read(self.record_length)
# Use parse once again.
self._parseMergedData(merged_data.strip(), record_type)
# Update the internal structure to finish parsing.
self._updateInternalSEEDStructure()
def _parseXSEED(self, data):
"""
Parse a XML-SEED string.
:type data: File pointer or StringIO object.
"""
data.seek(0)
root = xmlparse(data).getroot()
xseed_version = root.get('version')
headers = root.getchildren()
# Set all temporary attributes.
self.temp = {'volume': [], 'abbreviations': [], 'stations': []}
# Parse volume which is assumed to be the first header. Only parse
# blockette 10 and discard the rest.
self.temp['volume'].append(\
self._parseXMLBlockette(headers[0].getchildren()[0], 'V',
xseed_version))
# Append all abbreviations.
for blockette in headers[1].getchildren():
self.temp['abbreviations'].append(\
self._parseXMLBlockette(blockette, 'A', xseed_version))
# Append all stations.
for control_header in headers[2:]:
if not control_header.tag == 'station_control_header':
continue
self.temp['stations'].append([])
for blockette in control_header.getchildren():
self.temp['stations'][-1].append(\
self._parseXMLBlockette(blockette, 'S', xseed_version))
# Update internal values.
self._updateInternalSEEDStructure()
def _getRESPString(self, resp, blockettes, station):
"""
Takes a file like object and a list of blockettes containing all
blockettes for one channel and writes them RESP like to the StringIO.
"""
blkt52 = blockettes[0]
# The first blockette in the list always has to be Blockette 52.
channel_info = {'Location': blkt52.location_identifier,
'Channel': blkt52.channel_identifier,
'Start date': blkt52.start_date,
'End date': blkt52.end_date}
# Set location and end date default values or convert end time..
if len(channel_info['Location']) == 0:
channel_info['Location'] = '??'
if not channel_info['End date']:
channel_info['End date'] = 'No Ending Time'
else:
channel_info['End date'] = channel_info['End date'].formatSEED()
# Convert starttime.
channel_info['Start date'] = channel_info['Start date'].formatSEED()
# Write Blockette 52 stuff.
resp.write(\
'B052F03 Location: %s\n' % channel_info['Location'] + \
'B052F04 Channel: %s\n' % channel_info['Channel'] + \
'B052F22 Start date: %s\n' % channel_info['Start date'] + \
'B052F23 End date: %s\n' % channel_info['End date'] + \
'#\t\t=======================================\n')
# Write all other blockettes. Currently now sorting takes place. This
# is just an experiment to see how rdseed does it. The Blockettes
# might need to be sorted.
for blockette in blockettes[1:]:
if blockette.id not in RESP_BLOCKETTES:
continue
try:
resp.write(blockette.getRESP(station, channel_info['Channel'],
self.abbreviations))
except AttributeError:
msg = 'RESP output for blockette %s not implemented yet.'
raise AttributeError(msg % blockette.id)
def _parseXMLBlockette(self, XML_blockette, record_type, xseed_version):
"""
Takes the lxml tree of any blockette and returns a blockette object.
"""
# Get blockette number.
blockette_id = int(XML_blockette.values()[0])
if blockette_id in HEADER_INFO[record_type].get('blockettes', []):
class_name = 'Blockette%03d' % blockette_id
if not hasattr(blockette, class_name):
raise SEEDParserException('Blockette %d not implemented!' %
blockette_id)
blockette_class = getattr(blockette, class_name)
blockette_obj = blockette_class(debug=self.debug,
strict=self.strict,
compact=self.compact,
version=self.version,
record_type=record_type,
xseed_version=xseed_version)
blockette_obj.parseXML(XML_blockette)
return blockette_obj
elif blockette_id != 0:
msg = "Unknown blockette type %d found" % blockette_id
raise SEEDParserException(msg)
def _createCutAndFlushRecord(self, blockettes, record_type):
"""
Takes all blockettes of a record and return a list of finished records.
If necessary it will cut the record and return two or more flushed
records.
The returned records also include the control header type code and the
record continuation code. Therefore the returned record will have the
length self.record_length - 6. Other methods are responsible for
writing the sequence number.
It will always return a list with records.
"""
length = self.record_length - 8
return_records = []
# Loop over all blockettes.
record = ''
for blockette in blockettes:
blockette.compact = self.compact
rec_len = len(record)
# Never split a blockette’s “length/blockette type” section across
# records.
if rec_len + 7 > length:
# Flush the rest of the record if necessary.
record += ' ' * (length - rec_len)
return_records.append(record)
record = ''
rec_len = 0
blockette_str = blockette.getSEED()
# Calculate how much of the blockette is too long.
overhead = rec_len + len(blockette_str) - length
# If negative overhead: Write blockette.
if overhead <= 0:
record += blockette_str
# Otherwise finish the record and start one or more new ones.
else:
record += blockette_str[:len(blockette_str) - overhead]
# The record so far not written.
rest_of_the_record = blockette_str[(len(blockette_str) - \
overhead):]
# Loop over the number of records to be written.
for _i in xrange(int(math.ceil(len(rest_of_the_record) / \
float(length)))):
return_records.append(record)
record = ''
# It doesn't hurt to index a string more than its length.
record = record + \
rest_of_the_record[_i * length: (_i + 1) * length]
if len(record) > 0:
return_records.append(record)
# Flush last record
return_records[-1] = return_records[-1] + ' ' * \
(length - len(return_records[-1]))
# Add control header and continuation code.
return_records[0] = record_type + ' ' + return_records[0]
for _i in range(len(return_records) - 1):
return_records[_i + 1] = record_type + '*' + return_records[_i + 1]
return return_records
def _checkBlockettes(self):
"""
Checks if all blockettes necessary for creating a SEED String are
available.
"""
if not 10 in [_i.id for _i in self.volume]:
return False
abb_blockettes = [_i.id for _i in self.abbreviations]
if not 30 in abb_blockettes and not 33 in abb_blockettes and \
not 34 in abb_blockettes:
return False
# Check every station:
for _i in self.stations:
stat_blockettes = [_j.id for _j in _i]
if not 50 in stat_blockettes and not 52 in stat_blockettes and \
not 58 in stat_blockettes:
return False
return True
def _compareBlockettes(self, blkt1, blkt2):
"""
Compares two blockettes.
"""
for key in blkt1.__dict__.keys():
# Continue if just some meta data.
if key in utils.IGNORE_ATTR:
continue
if blkt1.__dict__[key] != blkt2.__dict__[key]:
return False
return True
def _updateInternalSEEDStructure(self):
"""
Takes everything in the self.temp dictionary and writes it into the
volume, abbreviations and stations attributes of the class.
The self.temp dictionary can only contain one seed volume with a
correct structure.
This method will try to merge everything, discard double entries and
adjust abbreviations.
It will also discard unnecessary blockettes that will be created
again when writing SEED or XSEED.
"""
# If called without a filled temporary dictionary do nothing.
if not self.temp:
return
# Check if everything is empty.
if not self.volume and not self.abbreviations and \
len(self.stations) == 0:
# Delete Blockette 11 and 12.
self.volume = [i for i in self.temp['volume']
if i.id not in [11, 12]]
self.abbreviations = self.temp['abbreviations']
self.stations.extend(self.temp['stations'])
del self.temp
else:
msg = 'Merging is an experimental feature and still contains ' + \
'a lot of errors!'
warnings.warn(msg, UserWarning)
# XXX: Sanity check for multiple Blockettes. Remove duplicates.
# self._removeDuplicateAbbreviations()
# Check the abbreviations.
for blkt in self.temp['abbreviations']:
id = blkt.blockette_type
# Loop over all existing abbreviations and find those with the
# same id and content.
cur_index = 1
# Helper variable.
blkt_done = False
for ex_blkt in self.abbreviations:
if id != ex_blkt.blockette_type:
continue
# Raise the current index if it is the same blockette.
cur_index += 1
if not self._compareBlockettes(blkt, ex_blkt):
continue
# Update the current blockette and all abbreviations.
self._updateTemporaryStations(id, getattr(ex_blkt,
INDEX_FIELDS[id]))
blkt_done = True
break
if not blkt_done:
self._updateTemporaryStations(id, cur_index)
# Append abbreviation.
setattr(blkt, INDEX_FIELDS[id], cur_index)
self.abbreviations.append(blkt)
# Update the stations.
self.stations.extend(self.temp['stations'])
#XXX Update volume control header!
# Also make the version of the format 2.4.
self.volume[0].version_of_format = 2.4
def _updateTemporaryStations(self, blkt_id, index_nr):
"""
Loops over all stations, finds the corresponding blockettes and changes
all abbreviation lookup codes.
"""
# Blockette dictionary which maps abbreviation IDs and and fields.
index = {
# Abbreviation Blockette : {Station Blockette: (Fields)}
30: {52: (16,)},
31: {51: (5,), 59: (5,)},
33: {50: (10,), 52: (6,)},
34: {52: (8, 9), 53: (5, 6), 54: (5, 6), 55: (4, 5)}
}
blockettes = index[blkt_id]
# Loop over all stations.
stations = self.temp['stations']
for station in stations:
for blkt in station:
try:
fields = blockettes[blkt.blockette_type]
except:
continue
for field in fields:
setattr(blkt, blkt.getFields()[field - 2].field_name,
index_nr)
def _parseMergedData(self, data, record_type):
"""
This method takes any merged SEED record and writes its blockettes
in the corresponding dictionary entry of self.temp.
"""
if not data:
return
# Create StringIO for easier access.
data = StringIO(data)
# Do not do anything if no data is passed or if a time series header
# is passed.
if record_type not in HEADERS:
return
# Set standard values.
blockette_length = 0
blockette_id = -1
# Find out what kind of record is being parsed.
if record_type == 'S':
# Create new station blockettes list.
self.temp['stations'].append([])
root_attribute = self.temp['stations'][-1]
elif record_type == 'V':
# Just one Volume header per file allowed.
if len(self.temp['volume']):
msg = 'More than one Volume index control header found!'
raise SEEDParserException(msg)
root_attribute = self.temp['volume']
else:
# Just one abbreviations header allowed!
if len(self.temp['abbreviations']):
msg = 'More than one Abbreviation Dictionary Control ' + \
'Headers found!'
warnings.warn(msg, UserWarning)
root_attribute = self.temp['abbreviations']
# Loop over all blockettes in data.
while blockette_id != 0:
# remove spaces between blockettes
while data.read(1) == ' ':
continue
data.seek(-1, 1)
try:
blockette_id = int(data.read(3))
blockette_length = int(data.read(4))
except:
break
data.seek(-7, 1)
if blockette_id in HEADER_INFO[record_type].get('blockettes', []):
class_name = 'Blockette%03d' % blockette_id
if not hasattr(blockette, class_name):
raise SEEDParserException('Blockette %d not implemented!' %
blockette_id)
blockette_class = getattr(blockette, class_name)
blockette_obj = blockette_class(debug=self.debug,
strict=self.strict,
compact=self.compact,
version=self.version,
record_type=record_type)
blockette_obj.parseSEED(data, blockette_length)
root_attribute.append(blockette_obj)
self.blockettes.setdefault(blockette_id,
[]).append(blockette_obj)
elif blockette_id != 0:
msg = "Unknown blockette type %d found" % blockette_id
raise SEEDParserException(msg)
# check if everything is parsed
if data.len != data.tell():
warnings.warn("There exist unparsed elements!")
def _createBlockettes11and12(self, blockette12=False):
"""
Creates blockettes 11 and 12 for SEED writing and XSEED version 1.1
writing.
"""
# All the following unfortunately is necessary to get a correct
# Blockette 11:
# Start with the station strings to be able to write Blockette 11
# later on. The created list will contain lists with the first item
# being the corresponding station identifier code and each part of the
# record being a separate item.
stations = []
# Loop over all stations.
for _i in self.stations:
station = []
# Blockette 50 always should be the first blockette
station.append(_i[0].station_call_letters)
# Loop over blockettes.
station.extend(self._createCutAndFlushRecord(_i, 'S'))
stations.append(station)
# Make abbreviations.
abbreviations = self._createCutAndFlushRecord(self.abbreviations, 'A')
abbr_lenght = len(abbreviations)
cur_count = 1 + abbr_lenght
while True:
blkt11 = blockette.Blockette011()
blkt11.number_of_stations = len(self.stations)
stations_lengths = [cur_count + 1]
for _i in [len(_i) - 1 for _i in stations][:-1]: