Merge 1e4d579 into ba0d5b0

obspy · Jul 2, 2016 · 0eca638 · 0eca638
2 parents ba0d5b0 + 1e4d579
commit 0eca638
Show file tree

Hide file tree

Showing 12 changed files with 531 additions and 15 deletions.
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
@@ -1,4 +1,7 @@
 master:
+ - General:
+   * Read support for Guralp Compressed Format (GCF) waveform data,
+     obspy.io.gcf (see #1449)
  - obspy.core:
    * Stream/Trace.write() can now autodetect file format from file extension.
    * New convenience property `.matplotlib_date` for `UTCDateTime` objects to

diff --git a/misc/docs/source/packages/index.rst b/misc/docs/source/packages/index.rst
@@ -39,6 +39,7 @@ The functionality is provided through the following packages:
    obspy.io.ascii
    obspy.io.css
    obspy.io.datamark
+   obspy.io.gcf
    obspy.io.gse2
    obspy.io.kinemetrics
    obspy.io.mseed

diff --git a/misc/docs/source/packages/obspy.io.gcf.rst b/misc/docs/source/packages/obspy.io.gcf.rst
@@ -0,0 +1,15 @@
+.. currentmodule:: obspy.io.gcf
+.. automodule:: obspy.io.gcf
+
+    .. comment to end block
+
+    Modules
+    -------
+    .. autosummary::
+       :toctree: autogen
+       :nosignatures:
+
+       core
+       libgcf
+
+    .. comment to end block
diff --git a/obspy/core/util/base.py b/obspy/core/util/base.py
@@ -35,12 +35,13 @@
 # defining ObsPy modules currently used by runtests and the path function
 DEFAULT_MODULES = ['clients.filesystem', 'core', 'db', 'geodetics', 'imaging',
                    'io.ah', 'io.ascii', 'io.cmtsolution', 'io.cnv', 'io.css',
-                   'io.datamark', 'io.gse2', 'io.json', 'io.kinemetrics',
-                   'io.kml', 'io.mseed', 'io.ndk', 'io.nied', 'io.nlloc',
-                   'io.pdas', 'io.pde', 'io.quakeml', 'io.sac', 'io.seg2',
-                   'io.segy', 'io.seisan', 'io.sh', 'io.shapefile',
-                   'io.seiscomp', 'io.stationtxt', 'io.stationxml', 'io.wav',
-                   'io.xseed', 'io.y', 'io.zmap', 'realtime', 'signal', 'taup']
+                   'io.datamark', 'io.gcf', 'io.gse2', 'io.json',
+                   'io.kinemetrics', 'io.kml', 'io.mseed', 'io.ndk',
+                   'io.nied', 'io.nlloc', 'io.pdas', 'io.pde', 'io.quakeml',
+                   'io.sac', 'io.seg2', 'io.segy', 'io.seisan', 'io.sh',
+                   'io.shapefile', 'io.seiscomp', 'io.stationtxt',
+                   'io.stationxml', 'io.wav', 'io.xseed', 'io.y', 'io.zmap',
+                   'realtime', 'signal', 'taup']
 NETWORK_MODULES = ['clients.arclink', 'clients.earthworm', 'clients.fdsn',
                    'clients.iris', 'clients.neic', 'clients.seedlink',
                    'clients.seishub', 'clients.syngine']
@@ -50,7 +51,8 @@
 WAVEFORM_PREFERRED_ORDER = ['MSEED', 'SAC', 'GSE2', 'SEISAN', 'SACXY', 'GSE1',
                             'Q', 'SH_ASC', 'SLIST', 'TSPAIR', 'Y', 'PICKLE',
                             'SEGY', 'SU', 'SEG2', 'WAV', 'DATAMARK', 'CSS',
-                            'NNSA_KB_CORE', 'AH', 'PDAS', 'KINEMETRICS_EVT']
+                            'NNSA_KB_CORE', 'AH', 'PDAS', 'KINEMETRICS_EVT',
+                            'GCF']
 EVENT_PREFERRED_ORDER = ['QUAKEML', 'NLLOC_HYP']
 # waveform plugins accepting a byteorder keyword
 WAVEFORM_ACCEPT_BYTEORDER = ['MSEED', 'Q', 'SAC', 'SEGY', 'SU']

diff --git a/obspy/io/gcf/__init__.py b/obspy/io/gcf/__init__.py
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+"""
+obspy.io.gcf - Guralp Compressed Format read support for ObsPy
+==============================================================
+This module provides read support for GCF waveform data and header info.
+Most methods are based on info from Guralp site
+http://geophysics.eas.gatech.edu/GTEQ/Scream4.4/GCF_Specification.htm
+
+:copyright:
+    The ObsPy Development Team (devs@obspy.org) & Ran Novitsky Nof
+:license:
+    GNU Lesser General Public License, Version 3
+    (https://www.gnu.org/copyleft/lesser.html)
+
+Reading
+-------
+Similar to reading any other waveform data format using :mod:`obspy.core`
+The format will be determined automatically.
+
+>>> from obspy import read
+>>> st = read("/path/to/20160603_1955n.gcf")
+
+You can also specify the following keyword arguments that change the
+behavior of reading the file:
+* ``headonly=True``: Read only the header part, not the actual data
+"""
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+from future.builtins import *  # NOQA
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod(exclude_empty=True)
diff --git a/obspy/io/gcf/core.py b/obspy/io/gcf/core.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+"""
+GCF bindings to ObsPy core module.
+"""
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+from future.builtins import *  # NOQA
+
+from obspy import Stream, Trace, UTCDateTime
+
+from . import libgcf
+
+
+def merge_gcf_stream(st):
+    """
+    Merges GCF stream (replacing Stream.merge(-1) for headonly=True)
+
+    :type st: :class:`~obspy.core.stream.Stream`
+    :param st: GCF Stream object whith no data
+    :rtype: :class:`~obspy.core.stream.Stream`
+    :returns: Stream object containing header and data.
+    """
+    traces = []
+    for tr in st:
+        delta = tr.stats.delta
+        starttime = tr.stats.starttime
+        endtime = tr.stats.endtime
+        for trace in traces:
+            if tr.id == trace.id and delta == trace.stats.delta \
+               and not starttime == trace.stats.starttime:
+                if 0 < starttime - trace.stats.endtime <= delta:
+                    trace.stats.npts += tr.stats.npts
+                    break
+                elif 0 < trace.stats.starttime - endtime <= delta:
+                    trace.stats.starttime = UTCDateTime(starttime)
+                    trace.stats.npts += tr.stats.npts
+                    break
+        else:
+            traces.append(tr)
+    return Stream(traces=traces)
+
+
+def _is_gcf(filename):
+    """
+    Checks whether a file is GCF or not.
+
+    :type filename: str
+    :param filename: GCF file to be checked.
+    :rtype: bool
+    :return: ``True`` if a GCF file.
+    """
+    try:
+        with open(filename, 'rb') as f:
+            libgcf.is_gcf(f)
+    except:
+        return False
+    return True
+
+
+def _read_gcf(filename, headonly=False, **kwargs):  # @UnusedVariable
+    """
+    Reads a GCF file and returns a Stream object.
+
+    only GCF files containing data records are supported.
+
+    .. warning::
+        This function should NOT be called directly, it registers via the
+        ObsPy :func:`~obspy.core.stream.read` function, call this instead.
+
+    :type filename: str
+    :param filename: GCF file to be read.
+    :type headonly: bool, optional
+    :param headonly: If True read only head of GCF file.
+    :type channel_prefix: str, optional
+    :param channel_prefix: Channel band and instrument codes.
+        Defaults to ``HH``.
+    :rtype: :class:`~obspy.core.stream.Stream`
+    :returns: Stream object containing header and data.
+
+    .. rubric:: Example
+    >>> from obspy import read
+    >>> st = read("/path/to/20160603_1955n.gcf", format="GCF")
+    """
+    traces = []
+    with open(filename, 'rb') as f:
+        while True:
+            try:
+                if headonly:
+                    header = libgcf.read_header(f, **kwargs)
+                    if header:
+                        traces.append(Trace(header=header))
+                else:
+                    hd = libgcf.read(f, **kwargs)
+                    if hd:
+                        traces.append(Trace(header=hd[0], data=hd[1]))
+            except EOFError:
+                break
+    st = Stream(traces=traces)
+    if headonly:
+        st = merge_gcf_stream(st)
+    else:
+        st.merge(-1)
+    return st
diff --git a/obspy/io/gcf/libgcf.py b/obspy/io/gcf/libgcf.py
@@ -0,0 +1,161 @@
+# -*- coding: utf-8 -*-
+# reads Guralp Compressed Format (GCF) Files
+# By Ran Novitsky Nof @ BSL, 2016
+# ran.nof@gmail.com
+# Based on Guralp's GCF reference (GCF-RFC-GCFR, Issue C, 2011-01-05)
+# more details available from: http://www.guralp.com/apps/ok?doc=GCF_Intro
+# last access: June, 2016
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+from future.builtins import *  # NOQA
+
+import numpy as np
+
+from obspy import UTCDateTime
+
+SPS_D = {  # Table 3.1: special sample rates
+    157: 0.1,
+    161: 0.125,
+    162: 0.2,
+    164: 0.25,
+    167: 0.5,
+    171: 400,
+    174: 500,
+    176: 1000,
+    179: 2000,
+    181: 4000}
+TIME_OFFSETS_D = {  # Table 3.1: Time fractional offset denominator
+    171: 8.,
+    174: 2.,
+    176: 4.,
+    179: 8.,
+    181: 16.}
+COMPRESSION_D = {  # Table 3.2: format field to data type
+    1: '>i4',
+    2: '>i2',
+    4: '>i1'}
+
+
+def is_gcf(f):
+    """
+    Test if file is GCF by reading at least 1 data block
+    """
+    header, data = read_data_block(f)
+
+
+def decode36(data):
+    """
+    Converts an integer into a base36 string.
+    """
+    # http://geophysics.eas.gatech.edu/GTEQ/Scream4.4/Decoding_Base_36_numbers_C.htm
+    s = ''
+    while data:
+        imed = data % 36
+        if imed > 9:
+            c = chr(imed - 10 + ord('A'))
+        else:
+            c = chr(imed + ord('0'))
+        s = c + s
+        data = data // 36
+    return s
+
+
+def decode_date_time(data):
+    """
+    Decode date and time field.
+
+    The date code is a 32 bit value specifying the start time of the block.
+    Bits 0-16 contain the number of seconds since midnight,
+    and bits 17-31 the number of days since 17th November 1989.
+    """
+    days = data >> 17
+    secs = data & 0x1FFFF
+    starttime = UTCDateTime('1989-11-17') + days * 86400 + secs
+    return starttime
+
+
+def read_data_block(f, headonly=False, channel_prefix="HH", **kwargs):
+    """
+    Read one data block from GCF file.
+
+    more details can be found here:
+    http://geophysics.eas.gatech.edu/GTEQ/Scream4.4/GCF_Specification.htm
+    f - file object to read from
+    if skipData is True, Only header is returned.
+    if not a data block (SPS=0) - returns None.
+    """
+    # get ID
+    sysid = f.read(4)
+    if not sysid:
+        raise EOFError  # got to EOF
+    sysid = np.frombuffer(sysid, count=1, dtype='>u4')
+    if sysid >> 31 & 0b1 > 0:
+        sysid = (sysid << 6) >> 6
+    sysid = decode36(sysid)
+    # get Stream ID
+    stid = np.frombuffer(f.read(4), count=1, dtype='>u4')
+    stid = decode36(stid)
+    # get Date & Time
+    data = np.frombuffer(f.read(4), count=1, dtype='>u4')
+    starttime = decode_date_time(data)
+    # get data format
+    # get reserved, SPS, data type compression,
+    # number of 32bit records (num_records)
+    reserved, sps, compress, num_records = np.frombuffer(f.read(4), count=4,
+                                                         dtype='>u1')
+    compression = compress & 0b00001111  # get compression code
+    t_offset = compress >> 4  # get time offset
+    if t_offset > 0:
+        starttime = starttime + t_offset / TIME_OFFSETS_D[sps]
+    if sps in SPS_D:
+        sps = SPS_D[sps]  # get special SPS value if needed
+    if not sps:
+        f.seek(num_records * 4, 1)  # skip if not a data block
+        if 1008 - num_records * 4 > 0:
+            # keep skipping to get 1008 record
+            f.seek(1008 - num_records * 4, 1)
+        return None
+    npts = num_records * compression  # number of samples
+    header = {}
+    header['starttime'] = starttime
+    header['station'] = stid[:4]
+    header['channel'] = (channel_prefix[:2] + stid[4]).upper()
+    header['sampling_rate'] = float(sps)
+    header['npts'] = npts
+    if headonly:
+        f.seek(4 * (num_records + 2), 1)  # skip data part (inc. FIC and RIC)
+        # skip to end of block if only partly filled with data
+        if 1000 - num_records * 4 > 0:
+            f.seek(1000 - num_records * 4, 1)
+        return header
+    else:
+        # get FIC
+        fic = np.frombuffer(f.read(4), count=1, dtype='>i4')
+        # get incremental data
+        data = np.frombuffer(f.read(4 * num_records), count=npts,
+                             dtype=COMPRESSION_D[compression])
+        # construct time series
+        data = (fic + np.cumsum(data)).astype('i4')
+        # get RIC
+        ric = np.frombuffer(f.read(4), count=1, dtype='>i4')
+        # skip to end of block if only partly filled with data
+        if 1000 - num_records * 4 > 0:
+            f.seek(1000 - num_records * 4, 1)
+        # verify last data sample matches RIC
+        if not data[-1] == ric:
+            raise ValueError("Last sample mismatch with RIC")
+        return header, data
+
+
+def read_header(f, **kwargs):
+    """
+    Reads header only from GCF file.
+    """
+    return read_data_block(f, headonly=True, **kwargs)
+
+
+def read(f, **kwargs):
+    """
+    Reads header and data from GCF file.
+    """
+    return read_data_block(f, headonly=False, **kwargs)
diff --git a/obspy/io/gcf/tests/__init__.py b/obspy/io/gcf/tests/__init__.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+from future.builtins import *  # NOQA
+
+import unittest
+
+from obspy.core.util import add_doctests, add_unittests
+
+
+MODULE_NAME = "obspy.io.gcf"
+
+
+def suite():
+    suite = unittest.TestSuite()
+    add_doctests(suite, MODULE_NAME)
+    add_unittests(suite, MODULE_NAME)
+    return suite
+
+
+if __name__ == '__main__':
+    unittest.main(defaultTest='suite')
diff --git a/obspy/io/gcf/tests/data/20160603_1910n.gcf b/obspy/io/gcf/tests/data/20160603_1910n.gcf
diff --git a/obspy/io/gcf/tests/data/20160603_1955n.gcf b/obspy/io/gcf/tests/data/20160603_1955n.gcf