In [59]:
import pandas as pd
import h5py
import numpy as np
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt

In [60]:
def bandpass(x, freqmin=1, freqmax=20, sampling_freq=100):
        xw = np.fft.fft(x, axis=0)
        f = np.fft.fftfreq(np.shape(x)[0], 1.0 / sampling_freq)
        mask = (np.abs(f) < freqmin) | (np.abs(f) > freqmax)
        xw[mask, :] = 0

        x = np.fft.ifft(xw, axis=0)
        return np.real(x).astype(np.float32)

def plot_waveform(x, spec, title, t=np.linspace(0, 120, 12000)):
    shapex = np.shape(x)
    if len(shapex) == 1:
        channels = 1
        x = np.expand_dims(x, axis=1)
    else:
        channels = shapex[1]

    # Generate axis object with three rows.
    waveforms = gridspec.GridSpecFromSubplotSpec(
        channels,
        1,
        subplot_spec=spec,
        wspace=0.1,
        hspace=0.1,
    )

    for i in range(channels):
        ax = plt.subplot(waveforms[i, 0])
        if i == 0:
            ax.set_title(title, loc="left")
        
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)
            
        ax.plot(t, x[:, i], linewidth=0.6)

In [61]:
data_hdf5_path = "/home/onur/dataset_preprocessed/instance/subsampled_100percent.hdf5"
data_csv_path="/home/onur/dataset_preprocessed/instance/subsampled_100percent.csv"

In [62]:
pick_hdf5_path = "/home/onur/instance/events/waveforms.hdf5"
pick_metadata_csv_path = "/home/onur/instance/events/metadata.csv"
noise_hdf5_path = "/home/onur/instance/noise/waveforms.hdf5"
noise_metadata_csv_path = "/home/onur/instance/noise/metadata.csv"

In [63]:
df_preprocessed = pd.read_csv(data_csv_path)

In [69]:
def is_test_sample(row):
    if row["label"].values[0] == "eq":
        min_val = row["p_arrival_sample"].values[0] + 300 - 3000
        max_val = row["p_arrival_sample"].values[0] - 300
        return ((row["crop_offset"].values[0] > min_val) and (row["crop_offset"].values[0] < max_val))
    else:
        return True

In [70]:
df_eq = pd.read_csv(pick_metadata_csv_path)
df_no = pd.read_csv(noise_metadata_csv_path)

  df_eq = pd.read_csv(pick_metadata_csv_path)


In [71]:
f_preprocessed = h5py.File(data_hdf5_path, "r")
f_eq = h5py.File(pick_hdf5_path, "r")
f_no = h5py.File(noise_hdf5_path, "r")

In [72]:
for i in range(100):
    row = df_preprocessed.sample(1)
    if not is_test_sample(row):
        continue

    chunk_idx = row["chunk_idx"].values[0]
    df_chunk = df_preprocessed[df_preprocessed["chunk_idx"] == chunk_idx]
    df_chunk.reset_index(inplace=True)
    
    batch_offset = df_chunk[df_chunk.trace_name == row["trace_name"].values[0]].index[0] // 256
    waveform_idx = df_chunk[df_chunk.trace_name == row["trace_name"].values[0]].index[0] % 256
    print(batch_offset, waveform_idx)
    trace_name = row["trace_name"].values[0]

    wf = f_preprocessed.get("data/x/chunk{}/{}".format(chunk_idx, batch_offset))[waveform_idx, :, :]
    gs = gridspec.GridSpec(1, 2)
    plot_waveform(wf, gs[0, 0], trace_name, t=np.linspace(0, 30, 3000))

    crop_offset = row["crop_offset"].values[0]
    if row["label"].values[0] == "eq":
        wf = f_eq.get("data/{}".format(trace_name))[...]
    else:
        wf = f_no.get("data/{}".format(trace_name))[...]

    wf = bandpass(np.transpose(wf, axes=(1, 0)))
    wf = wf[crop_offset:crop_offset+3000, :]
    plot_waveform(wf, gs[0, 1], trace_name, t=np.linspace(0, 30, 3000))

    label = row["label"].values[0]
    plt.savefig(f"../plots/{label}_{trace_name}.png")
    plt.clf()

134 125
73 26
174 184
109 67
151 72
8 20
122 142
10 135
134 91
20 38
90 37
97 9
164 96
251 100
100 151
220 20
40 156
78 92
157 211
72 102
226 44
107 148
94 23
55 159
4 63
24 100
14 133
145 248
54 209
79 204
51 223
155 16
96 19
56 150
34 175
43 58
35 122
147 73
212 115
249 242
111 92
165 78
27 174
0 70
80 141
159 174
25 98
36 100
52 111
76 70
66 123
154 75
57 118
167 129
80 161
77 27
48 93
111 196
18 218
110 228
75 69
22 230
214 206
210 27
126 83
90 33
34 173
103 61
132 175
78 197
185 252


<Figure size 640x480 with 0 Axes>