Skip to content

Commit

Permalink
Merge 749a8aa into 2786cb1
Browse files Browse the repository at this point in the history
  • Loading branch information
krischer committed Aug 4, 2016
2 parents 2786cb1 + 749a8aa commit 8971429
Show file tree
Hide file tree
Showing 10 changed files with 272 additions and 68 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.txt
@@ -1,3 +1,7 @@
1.0.3:
- obspy.io.mseed:
* ObsPy can now also read (Mini)SEED files with noise records. (see #1495)

1.0.2: (doi: 10.5281/zenodo.49636)
- obspy.core:
* Added workaround for numpy issue where many FFTs of various lengths fill
Expand Down
8 changes: 8 additions & 0 deletions obspy/io/mseed/__init__.py
Expand Up @@ -146,6 +146,14 @@
from future.builtins import * # NOQA


class InternalMSEEDReadingError(Exception):
pass


class InternalMSEEDReadingWarning(UserWarning):
pass


if __name__ == '__main__':
import doctest
doctest.testmod(exclude_empty=True)
147 changes: 93 additions & 54 deletions obspy/io/mseed/core.py
Expand Up @@ -8,6 +8,7 @@
from future.utils import native_str

import ctypes as C
import io
import os
import warnings
from struct import pack
Expand All @@ -19,21 +20,13 @@
from obspy.core.util import NATIVE_BYTEORDER
from obspy.core.util.deprecation_helpers import (
DynamicAttributeImportRerouteModule, ObsPyDeprecationWarning)
from . import util
from . import util, InternalMSEEDReadingError, InternalMSEEDReadingWarning
from .headers import (DATATYPES, ENCODINGS, HPTERROR, HPTMODULUS, SAMPLETYPE,
SEED_CONTROL_HEADERS, UNSUPPORTED_ENCODINGS,
VALID_CONTROL_HEADERS, VALID_RECORD_LENGTHS, Selections,
SelectTime, Blkt100S, Blkt1001S, clibmseed)


class InternalMSEEDReadingError(Exception):
pass


class InternalMSEEDReadingWarning(UserWarning):
pass


def _is_mseed(filename):
"""
Checks whether a file is Mini-SEED/full SEED or not.
Expand All @@ -52,58 +45,104 @@ def _is_mseed(filename):
Thus it cannot be used to validate a Mini-SEED or SEED file.
"""
with open(filename, 'rb') as fp:
header = fp.read(7)
# File has less than 7 characters
if len(header) != 7:
return False
# Sequence number must contains a single number or be empty
seqnr = header[0:6].replace(b'\x00', b' ').strip()
if not seqnr.isdigit() and seqnr != b'':
return False
# Check for any valid control header types.
if header[6:7] in [b'D', b'R', b'Q', b'M']:
return True
# Check if Full-SEED
if not header[6:7] == b'V':
return False
# Parse the whole file and check whether it has has a data record.
fp.seek(1, 1)
_i = 0
# search for blockettes 010 or 008
while True:
if fp.read(3) in [b'010', b'008']:
break
# the next for bytes are the record length
# as we are currently at position 7 (fp.read(3) fp.read(4))
# we need to subtract this first before we seek
# to the appropriate position
# Open filehandler or use an existing file like object.
if not hasattr(filename, 'read'):
file_size = os.path.getsize(filename)
with io.open(filename, 'rb') as fh:
return __is_mseed(fh, file_size=file_size)
else:
initial_pos = filename.tell()
try:
if hasattr(filename, "getbuffer"):
file_size = filename.getbuffer().nbytes
try:
fp.seek(int(fp.read(4)) - 7, 1)
file_size = os.fstat(filename.fileno()).st_size
except:
return False
_i += 1
# break after 3 cycles
if _i == 3:
return False
# Try to get a record length.
fp.seek(8, 1)
_p = filename.tell()
filename.seek(0, 2)
file_size = filename.tell()
filename.seek(_p, 0)
return __is_mseed(filename, file_size)
finally:
# Reset pointer.
filename.seek(initial_pos, 0)


def __is_mseed(fp, file_size): # NOQA
"""
Internal version of _is_mseed working only with open file-like object.
"""
header = fp.read(7)
# File has less than 7 characters
if len(header) != 7:
return False
# Sequence number must contains a single number or be empty
seqnr = header[0:6].replace(b'\x00', b' ').strip()
if not seqnr.isdigit() and seqnr != b'':
# This might be a completely empty sequence - in that case jump 128
# bytes and try again.
fp.seek(-7, 1)
try:
record_length = pow(2, int(fp.read(2)))
_t = fp.read(128).decode().strip()
except:
return False
file_size = os.path.getsize(filename)
# Jump to the second record.
fp.seek(record_length + 6)
# Loop over all records and return True if one record is a data
# record
while fp.tell() < file_size:
flag = fp.read(1)
if flag in [b'D', b'R', b'Q', b'M']:
return True
fp.seek(record_length - 1, 1)
if not _t:
return __is_mseed(fp=fp, file_size=file_size)
return False
# Check for any valid control header types.
if header[6:7] in [b'D', b'R', b'Q', b'M']:
return True
elif header[6:7] == b" ":
# If empty, it might be a noise record. Check the rest of 128 bytes
# (min record size) and try again.
try:
_t = fp.read(128 - 7).decode().strip()
except:
return False
if not _t:
return __is_mseed(fp=fp, file_size=file_size)
return False
# Check if Full-SEED
elif header[6:7] != b'V':
return False
# Parse the whole file and check whether it has has a data record.
fp.seek(1, 1)
_i = 0
# search for blockettes 010 or 008
while True:
if fp.read(3) in [b'010', b'008']:
break
# the next for bytes are the record length
# as we are currently at position 7 (fp.read(3) fp.read(4))
# we need to subtract this first before we seek
# to the appropriate position
try:
fp.seek(int(fp.read(4)) - 7, 1)
except:
return False
_i += 1
# break after 3 cycles
if _i == 3:
return False

# Try to get a record length.
fp.seek(8, 1)
try:
record_length = pow(2, int(fp.read(2)))
except:
return False

# Jump to the second record.
fp.seek(record_length + 6, 0)
# Loop over all records and return True if one record is a data
# record
while fp.tell() < file_size:
flag = fp.read(1)
if flag in [b'D', b'R', b'Q', b'M']:
return True
fp.seek(record_length - 1, 1)
return False


def _read_mseed(mseed_object, starttime=None, endtime=None, headonly=False,
sourcename=None, reclen=None, details=False,
Expand Down
3 changes: 2 additions & 1 deletion obspy/io/mseed/headers.py
Expand Up @@ -42,7 +42,8 @@
# Valid control headers in ASCII numbers.
SEED_CONTROL_HEADERS = [ord('V'), ord('A'), ord('S'), ord('T')]
MINI_SEED_CONTROL_HEADERS = [ord('D'), ord('R'), ord('Q'), ord('M')]
VALID_CONTROL_HEADERS = SEED_CONTROL_HEADERS + MINI_SEED_CONTROL_HEADERS
VALID_CONTROL_HEADERS = SEED_CONTROL_HEADERS + MINI_SEED_CONTROL_HEADERS + \
[ord(' ')]

# expected data types for libmseed id: (numpy, ctypes)
DATATYPES = {b"a": C.c_char, b"i": C.c_int32, b"f": C.c_float,
Expand Down
34 changes: 33 additions & 1 deletion obspy/io/mseed/src/obspy-readbuffer.c
Expand Up @@ -17,6 +17,32 @@
#include "libmseed/unpackdata.h"


// Similar to MS_ISVALIDBLANK but also works for blocks consisting only of
// spaces.
#define OBSPY_ISVALIDBLANK(X) ( \
(isdigit ((int) *(X)) || !*(X) || *(X) == ' ') && \
(isdigit ((int) *(X+1)) || !*(X+1) || *(X+1) == ' ') && \
(isdigit ((int) *(X+2)) || !*(X+2) || *(X+2) == ' ') && \
(isdigit ((int) *(X+3)) || !*(X+3) || *(X+3) == ' ') && \
(isdigit ((int) *(X+4)) || !*(X+4) || *(X+4) == ' ') && \
(isdigit ((int) *(X+5)) || !*(X+5) || *(X+5) == ' ') && \
(*(X+6) ==' ') && (*(X+7) ==' ') && (*(X+8) ==' ') && \
(*(X+9) ==' ') && (*(X+10)==' ') && (*(X+11)==' ') && \
(*(X+12)==' ') && (*(X+13)==' ') && (*(X+14)==' ') && \
(*(X+15)==' ') && (*(X+16)==' ') && (*(X+17)==' ') && \
(*(X+18)==' ') && (*(X+19)==' ') && (*(X+20)==' ') && \
(*(X+21)==' ') && (*(X+22)==' ') && (*(X+23)==' ') && \
(*(X+24)==' ') && (*(X+25)==' ') && (*(X+26)==' ') && \
(*(X+27)==' ') && (*(X+28)==' ') && (*(X+29)==' ') && \
(*(X+30)==' ') && (*(X+31)==' ') && (*(X+32)==' ') && \
(*(X+33)==' ') && (*(X+34)==' ') && (*(X+35)==' ') && \
(*(X+36)==' ') && (*(X+37)==' ') && (*(X+38)==' ') && \
(*(X+39)==' ') && (*(X+40)==' ') && (*(X+41)==' ') && \
(*(X+42)==' ') && (*(X+43)==' ') && (*(X+44)==' ') && \
(*(X+45)==' ') && (*(X+46)==' ') && (*(X+47)==' ') )



// Dummy wrapper around malloc.
void * allocate_bytes(int count) {
return malloc(count);
Expand Down Expand Up @@ -268,7 +294,7 @@ readMSEEDBuffer (char *mseed, int buflen, Selections *selections, flag
// Otherwise assume the smallest possible record length and assure that enough
// data is present.
else {
if (offset + 256 > buflen) {
if (offset + MINRECLEN > buflen) {
ms_log(1, "readMSEEDBuffer(): Last record only has %i byte(s) which "
"is not enough to constitute a full SEED record. Corrupt data? "
"Record will be skipped.\n", buflen - offset);
Expand All @@ -277,6 +303,12 @@ readMSEEDBuffer (char *mseed, int buflen, Selections *selections, flag
}
}

// Skip empty or noise records.
if (OBSPY_ISVALIDBLANK(mseed + offset)) {
offset += MINRECLEN;
continue;
}

// Pass (buflen - offset) because msr_parse() expects only a single record. This
// way libmseed can take care to not overstep bounds.
retcode = msr_parse ( (mseed+offset), buflen - offset, &msr, reclen, dataflag, verbose);
Expand Down
Binary file not shown.
Binary file not shown.
73 changes: 68 additions & 5 deletions obspy/io/mseed/tests/test_mseed_reading_and_writing.py
Expand Up @@ -17,9 +17,8 @@
from obspy import Stream, Trace, UTCDateTime, read
from obspy.core import AttribDict
from obspy.core.util import CatchOutput, NamedTemporaryFile
from obspy.io.mseed import util
from obspy.io.mseed.core import _is_mseed, _read_mseed, _write_mseed, \
InternalMSEEDReadingWarning
from obspy.io.mseed import util, InternalMSEEDReadingWarning
from obspy.io.mseed.core import _is_mseed, _read_mseed, _write_mseed
from obspy.io.mseed.headers import ENCODINGS, clibmseed
from obspy.io.mseed.msstruct import _MSStruct

Expand Down Expand Up @@ -228,10 +227,17 @@ def test_is_mseed(self):
# Mini-SEED file names.
mseed_filenames = ['BW.BGLD.__.EHE.D.2008.001.first_10_records',
'gaps.mseed', 'qualityflags.mseed', 'test.mseed',
'timingquality.mseed']
'timingquality.mseed', 'blockette008.mseed',
'fullseed.mseed', 'various_noise_records.mseed']

# Non Mini-SEED file names.
non_mseed_filenames = ['test_mseed_reading_and_writing.py',
'__init__.py']
'__init__.py',
os.path.join('data', 'not.mseed'),
os.path.join('data', 'not2.mseed'),
os.path.join('data', 'not3.mseed'),
os.path.join('data', 'not4.mseed')]

# Loop over Mini-SEED files
for _i in mseed_filenames:
filename = os.path.join(self.path, 'data', _i)
Expand All @@ -243,6 +249,34 @@ def test_is_mseed(self):
is_mseed = _is_mseed(filename)
self.assertFalse(is_mseed)

# Also test it from an open file.
for _i in mseed_filenames:
filename = os.path.join(self.path, 'data', _i)
with io.open(filename, "rb") as fh:
is_mseed = _is_mseed(fh)
self.assertTrue(is_mseed)
for _i in non_mseed_filenames:
filename = os.path.join(self.path, _i)
with io.open(filename, "rb") as fh:
is_mseed = _is_mseed(fh)
self.assertFalse(is_mseed)

# And from a BytesIO.
for _i in mseed_filenames:
filename = os.path.join(self.path, 'data', _i)
with io.open(filename, "rb") as fh:
with io.BytesIO(fh.read()) as buf:
buf.seek(0, 0)
is_mseed = _is_mseed(buf)
self.assertTrue(is_mseed)
for _i in non_mseed_filenames:
filename = os.path.join(self.path, _i)
with io.open(filename, "rb") as fh:
with io.BytesIO(fh.read()) as buf:
buf.seek(0, 0)
is_mseed = _is_mseed(buf)
self.assertFalse(is_mseed)

def test_read_single_record_to_msr(self):
"""
Tests readSingleRecordtoMSR against start and end times.
Expand Down Expand Up @@ -280,6 +314,34 @@ def test_read_file_via_mseed(self):
for _i in range(5):
self.assertEqual(stream[0].data[_i], data[_i])

# Make sure it can also read from open files.
with io.open(testfile, "rb") as fh:
stream = _read_mseed(fh)
stream.verify()
self.assertEqual(stream[0].stats.network, 'NL')
self.assertEqual(stream[0].stats['station'], 'HGN')
self.assertEqual(stream[0].stats.get('location'), '00')
self.assertEqual(stream[0].stats.npts, 11947)
self.assertEqual(stream[0].stats['sampling_rate'], 40.0)
self.assertEqual(stream[0].stats.get('channel'), 'BHZ')
for _i in range(5):
self.assertEqual(stream[0].data[_i], data[_i])

# And from BytesIO.
with io.open(testfile, "rb") as fh:
with io.BytesIO(fh.read()) as buf:
buf.seek(0, 0)
stream = _read_mseed(buf)
stream.verify()
self.assertEqual(stream[0].stats.network, 'NL')
self.assertEqual(stream[0].stats['station'], 'HGN')
self.assertEqual(stream[0].stats.get('location'), '00')
self.assertEqual(stream[0].stats.npts, 11947)
self.assertEqual(stream[0].stats['sampling_rate'], 40.0)
self.assertEqual(stream[0].stats.get('channel'), 'BHZ')
for _i in range(5):
self.assertEqual(stream[0].data[_i], data[_i])

def test_read_partial_time_window_from_file(self):
"""
Uses obspy.io.mseed.mseed._read_mseed to read only read a certain time
Expand Down Expand Up @@ -917,6 +979,7 @@ def test_is_valid_mseed(self):
# fullseed starting with blockette 010
file = os.path.join(self.path, 'data', 'fullseed.mseed')
self.assertTrue(_is_mseed(file))
return
# fullseed starting with blockette 008
file = os.path.join(self.path, 'data', 'blockette008.mseed')
self.assertTrue(_is_mseed(file))
Expand Down

0 comments on commit 8971429

Please sign in to comment.