In [None]:
%matplotlib ipympl

import h5py
import matplotlib.pyplot as plt
import numpy as np
import obspy
import os

DTYPE_INT  = np.int32
DTYPE_REAL = np.float64

In [None]:
def test_data():
    stream = obspy.read("../data/CI.CLC.HHZ.2019.185.msd")
    obs = [
        (obspy.UTCDateTime("2019-07-04T20:00:37.5"), 1.5, "P"),
        (obspy.UTCDateTime("2019-07-04T20:00:39.5"), 2, "S"),
        (obspy.UTCDateTime("2019-07-04T20:01:35.5"), 1.5, "P"),
        (obspy.UTCDateTime("2019-07-04T20:01:37.5"), 2, "S"),
        (obspy.UTCDateTime("2019-07-04T20:01:57.5"), 1.5, "P"),
        (obspy.UTCDateTime("2019-07-04T20:02:05.5"), 1.5, "P"),
        (obspy.UTCDateTime("2019-07-04T20:02:07"), 2, "S"),
        (obspy.UTCDateTime("2019-07-04T20:02:18.5"), 1.5, "P"),
        (obspy.UTCDateTime("2019-07-04T20:02:20.25"), 2, "S"),
        (obspy.UTCDateTime("2019-07-04T20:02:37"), 1.5, "P"),
        (obspy.UTCDateTime("2019-07-04T20:02:39"), 2, "S"),
        (obspy.UTCDateTime("2019-07-04T20:02:50"), 2, "N"),
        (obspy.UTCDateTime("2019-07-04T20:03:07"), 2, "N"),
        (obspy.UTCDateTime("2019-07-04T20:03:22"), 2, "N"),
        (obspy.UTCDateTime("2019-07-04T20:03:27"), 2, "N")
    ]
    data, label, sampling_rate = [], [], []
    for ob in obs:
        starttime, duration, phase = ob
        endtime = starttime + duration
        _stream = stream.slice(starttime=starttime, endtime=endtime)
        _stream = _stream.copy()
        trace = _stream[0]
        _data = trace.data
        _sampling_rate = trace.stats.sampling_rate
        data.append(_data)
        label.append(phase)
        sampling_rate.append(_sampling_rate)
    
    return(data, label, sampling_rate)

data, label, sampling_rate = test_data()

In [None]:
class FastMapTSLibrary(object):


    def __init__(self, path, kdim, sampling_rate=100, mode="w", overwrite=False):
        self._kdim = kdim
        self._sampling_rate = sampling_rate
        self._init_hdf5(path, mode, overwrite=overwrite)
        self._library_size = 0

    
    @property
    def image(self):
        return (
            self.hdf5.require_dataset(
                "/image",
                shape=(self.library_size, self.kdim),
                maxshape=(None, self.kdim),
                dtype=DTYPE_REAL,
                fillvalue=np.nan
            )
        )

    @property
    def kdim(self):
        return (self._kdim)
    
    @property
    def hdf5(self):
        return (self._hdf5)
    
    @property
    def library_size(self):
        """
        [Read only] The number of time series in the library.
        """
        return (self._library_size)
    
    @property
    def pivot(self):
        return (
            self.hdf5.require_dataset(
                "/pivot", 
                shape=(2, self.kdim),
                dtype=DTYPE_INT
            )
        )
    
    @property
    def sampling_rate(self):
        return (self._sampling_rate)
    
    @property
    def waveforms(self):
        return (self.hdf5.require_group("/waveforms"))


    def __del__(self):
        self.hdf5.close()


    def __enter__(self):
        return (self)


    def __exit__(self, exc_type, exc_value, exc_traceback):
        pass
    
    
    def _embed(self, k, dist):
        """
        Recursive function to embed the library data into k-dimensional
        Euclidean space.
        """

        if k <= 0:
            return (True)
        
        self._icol += 1
        
        # Choose the pivot objects.
        keys = list(self.waveforms.keys())
        b_name = np.random.choice(keys)
        b = self.waveforms[b_name]
        a_name = self.furthest(b, dist)
        a = self.waveforms[a_name]
        b_name = self.furthest(a, dist)
        b = self.waveforms[b_name]
        
        # Record the names of the pivot objects.
        self.pivot[0, self._icol] = int(a_name)
        self.pivot[1, self._icol] = int(b_name)
        
        if dist(a, b) == 0:
            self.image[:, self._icol] = 0
            return (True)
        
        # Project all objects onto line between objects a and b.
        d_ab = dist(a, b)
        self._irow = -1
        
        def update_image(name, i):
            self._irow += 1
            d_ai = dist(a, i)
            d_bi = dist(b, i)
            xi = (d_ai**2 + d_ab**2 - d_bi**2) / (2 * d_ab)
            self.image[self._irow, self._icol] = xi
            
        self.waveforms.visititems(update_image)
        del (self._irow)
        
        # Project all objects onto the hyperplane perpendicular to the
        # line between objects a and b.
        def new_dist(a, b, icol=self._icol, old_dist=dist):
            i = int(a.name.split("/")[-1])
            j = int(b.name.split("/")[-1])
            d_ab = old_dist(a, b)
            xa = self.image[i, icol]
            xb = self.image[j, icol]
            d = d_ab**2 - (xa - xb)**2
            return (d)
        
        return (self._embed(k-1, new_dist))
        
    
    def _init_hdf5(self, path, mode, overwrite=False):
        """
        Initialize the HDF5 backend.
        """
        
        if os.path.exists(path) and mode == "w" and not overwrite:
            raise (IOError(f"{path} already exists."))
        self._hdf5 = h5py.File(path, mode=mode)
        

    def append(self, data, labels, sampling_rate):
        """
        Append data to the library inventory. This does not perform the
        actual embedding into k-dimensional Euclidean space.
        
        Arguments
        data - A single np.ndarray or a list of np.ndarrays.
        """
        
        id = f"{self.library_size:09d}"
        self._library_size += 1
        dataset = self.waveforms.create_dataset(id, data=data)
        dataset.attrs["labels"] = labels
        
        return (True)


    def embed(self):
        """
        Embed the library data into k-dimensional Euclidean space.
        """

        self._icol = -1
        return_value = self._embed(self.kdim, distance)
        del (self._icol)
        return (return_value)

    
    def furthest(self, b, dist):
        """
        Return the name of the object furthest from b.
        """
        
        self._furthest_name, self._furthest_dist = None, 0
        
        def _furthest(name, a, b=b):
            d = dist(a, b)
            if d > self._furthest_dist:
                self._furthest_name = name
                self._furthest_dist = d

        self.waveforms.visititems(_furthest)
        
        furthest_name = self._furthest_name
        del (self._furthest_name, self._furthest_dist)
        return (furthest_name)

def correlate(a, b):
    """
    Return the normalized cross-correlation of a and b.
    """
    
    a = (a - np.mean(a)) / (np.std(a) * len(a))
    b = (b - np.mean(b)) / (np.std(b))
    corr = np.correlate(a, b, 'full')
    
    return (corr)


def distance(a, b):
    """
    Return the distance between a and b.
    """
    
    corr = correlate(a, b)
    dist = 1 - np.max(corr)
    
    return (dist)

fastmap_lib = FastMapTSLibrary("fastmap_test.h5", 4, overwrite=True)
for idx in range(len(data)):
    fastmap_lib.append(data[idx], label[idx], sampling_rate[idx])
    
fastmap_lib.embed()

In [None]:
import mpl_toolkits.mplot3d.axes3d

plt.close("all")
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1, projection="3d")
for label in ("P", "S", "N"):
    idxs = [int(key) for key in fastmap_lib.waveforms.keys() if fastmap_lib.waveforms[key].attrs["labels"] == label]
    ax.scatter(
        fastmap_lib.image[idxs, 0],
        fastmap_lib.image[idxs, 1],
        fastmap_lib.image[idxs, 2]
)