In [None]:
%matplotlib ipympl

import fastmap as fm
import h5py
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pathlib
import seaborn as sns
import sklearn.ensemble
import sklearn.metrics
import sklearn.svm

params = {
    "figure.figsize": (12.65, 5.71)
}
sns.set_theme(context="talk", rc=params)

In [None]:
with h5py.File("../data/test_database_00.hdf5", mode="r") as f5:
    X_train = f5["X_train"][:]
    y_train = f5["y_train"][:]
    X_test   = f5["X_test"][:]
    y_test   = f5["y_test"][:]

In [None]:
def plot_wiggles(array):
    hspace_station = 2
    hspace_channel = 0.5
    
    fig, ax = plt.subplots()
    colors = ("tab:grey", "tab:olive", "tab:brown")
    for ista, sgthr in enumerate(array):
        for ichan, trace in enumerate(sgthr):
            ax.plot(
                trace+ista*hspace_station+ichan*hspace_channel, 
                linewidth=0.5,
                color=colors[ichan]
            )
    ax.set_xlim(0, array.shape[-1])
    ax.set_xlabel("Sample index")
    ax.set_ylabel("Channel index")
    plt.tight_layout()
    

def plot_colormesh(array):
    fig, ax = plt.subplots()
    ax.pcolorfast(
        array.T
    )
    ax.invert_yaxis()
    plt.tight_layout()

In [None]:
%%time

path = pathlib.Path("fastmap.hdf5")
path.unlink(missing_ok=True)
fastmap = fm.FastMap(
    X_train, 
    y_train, 
    lambda a, b: fm.distance(a, b, mode="same", reduce=lambda c: np.percentile(c, 90, axis=0)), 
    6, 
    "fastmap.hdf5"
)

fastmap.embed_database();

W_train = fastmap.image[:]
df_train = pd.DataFrame(
    W_train,
    columns=[f"x{i}" for i in range(W_train.shape[-1])]
)
df_train["target"] = y_train

In [None]:
plt.close("all")
sns.pairplot(df_train.loc[:, ["x0", "x1", "x2", "x3", "target"]], hue="target")
# plt.tight_layout()

In [None]:
plt.tight_layout()

In [None]:
%%time

W_test = np.vstack([fastmap.embed(X) for X in X_test])

df_test = pd.DataFrame(
    W_test,
    columns=[f"x{i}" for i in range(W_test.shape[-1])]
)
df_test["target"] = y_test

In [None]:
sns.pairplot(df_test.loc[:, ["x0", "x1", "x2", "x3", "target"]], hue="target")

In [None]:
clf = sklearn.svm.SVC(**best_params)
clf.fit(W_train[:, :best_ndim], y_train)

In [None]:
# plt.close("all")
fig, ax = plt.subplots()
sklearn.metrics.plot_confusion_matrix(clf, W_test[:, :best_ndim], y_test, ax=ax)
ax.grid(False)

In [None]:
y_pred = clf.predict(W_test[:, :best_ndim])

In [None]:
def plot_wiggles(noise, microseisms):
    hspace_station = 2
    hspace_channel = 0.5
    
    fig, axes = plt.subplots(ncols=2, sharex=True, sharey=True)
    colors = ("tab:grey", "tab:olive", "tab:brown")
    for ista, sgthr in enumerate(noise):
        for ichan, trace in enumerate(sgthr):
            axes[0].plot(
                trace+ista*hspace_station+ichan*hspace_channel, 
                linewidth=0.5,
                color=colors[ichan]
            )
    for ista, sgthr in enumerate(microseisms):
        for ichan, trace in enumerate(sgthr):
            axes[1].plot(
                trace+ista*hspace_station+ichan*hspace_channel, 
                linewidth=0.5,
                color=colors[ichan]
            )
    
    for ax in axes:
        ax.set_xlim(0, noise.shape[-1])
        ax.set_xlabel("Sample index")
    axes[0].set_ylabel("Channel index")
    axes[0].set_title("Noise")
    axes[1].set_title("Microseism")
    plt.tight_layout()

In [None]:
plt.close("all")
plot_wiggles(X_test[0], X_test[72])

In [None]:
for i in np.nonzero(y_pred != y_test)[0]:
    print(y_pred[i], y_test[i])
    plot_wiggles(X_test[i])