In [1]:
import numpy as np
import matplotlib.pyplot as plt
from seismic_purifier import RepresentationLearningAutoencoder, RepresentationLearningDenoisingAutoencoder
from directory import get_checkpoint_path
from config import BATCH_SIZE, N_CHANNELS
from kfold_environment import KFoldEnvironment
from matplotlib.gridspec import GridSpec

2024-09-23 16:40:45.245165: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-23 16:40:45.264083: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-23 16:40:45.264113: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-23 16:40:45.264128: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-23 16:40:45.267828: I tensorflow/core/platform/cpu_feature_g

In [2]:
# -------------------------------
# Configuration and Setup
# -------------------------------

# Experiment name identifier
EXP_NAME = "exp_test"

# Choose the representation learning model class
REPRESENTATION_LEARNING_MODEL_CLASS = RepresentationLearningDenoisingAutoencoder

# Specify training and testing datasets ('stead' or 'instance')
TRAIN_DATASET = "stead"
TEST_DATASET = "stead"

# Number of training epochs
EPOCH = 6

# Data split identifier
SPLIT = 0

# Num of samples to plot.
NUM_SAMPLES = 10

In [15]:
def compute_autocovariance(data):
    """
    Computes the autocovariance for each channel and averages them.
    
    Args:
        data (np.ndarray): 2D array with shape (timesteps, channels)
    
    Returns:
        lags (np.ndarray): Lag values
        avg_autocov (np.ndarray): Averaged autocovariance
    """
    num_timesteps, num_channels = data.shape
    autocovariances = []
    for c in range(num_channels):
        channel_data = data[:, c]
        channel_data = channel_data - np.mean(channel_data)  # Zero-mean
        autocov = np.correlate(channel_data, channel_data, mode='full')
        autocovariances.append(autocov)
    autocovariances = np.array(autocovariances)
    avg_autocov = np.mean(autocovariances, axis=0)
    lags = np.arange(-num_timesteps + 1, num_timesteps)
    return lags, avg_autocov

def plot_waveform_channel(ax, timesteps, waveform, channel_idx, color='blue', show_xticks=True):
    """
    Plots a single waveform channel on the given axes.
    
    Args:
        ax (matplotlib.axes.Axes): Axes to plot on
        timesteps (np.ndarray): Array of timesteps
        waveform (np.ndarray): Waveform data for one channel
        channel_idx (int): Channel index (0-based)
        color (str): Color for the plot
        show_xticks (bool): Whether to show x-axis tick labels and label
    """
    channels =['E', 'N', 'Z']
    ax.plot(timesteps, waveform, color=color, linewidth=1)
    ax.set_title(channels[channel_idx], fontsize=12, pad=10)
    
    if show_xticks:
        ax.set_xlabel('Timesteps', fontsize=10)
        ax.set_ylabel('Amplitude', fontsize=10)
        ax.tick_params(axis='x', labelsize=8)
    else:
        ax.set_xlabel('')
        ax.set_ylabel('Amplitude', fontsize=10)
        ax.set_xticklabels([])
        ax.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
    
    ax.grid(True)

def plot_heatmap(ax, heatmap):
    """
    Plots the heatmap on the given axes.
    
    Args:
        ax (matplotlib.axes.Axes): Axes to plot on
        heatmap (np.ndarray): Shape (94, 64)
    """
    cax = ax.imshow(heatmap, aspect='auto', cmap='magma', origin='lower')
    ax.set_title('Heatmap (Feature Map)')
    ax.set_xlabel('Channels')
    ax.set_ylabel('Timesteps')
    plt.colorbar(cax, ax=ax, orientation='vertical', fraction=0.046, pad=0.04)

def plot_autocovariance(ax, lags, autocov, title):
    """
    Plots the autocovariance function on the given axes.
    
    Args:
        ax (matplotlib.axes.Axes): Axes to plot on
        lags (np.ndarray): Lag values
        autocov (np.ndarray): Autocovariance values
        title (str): Title of the plot
    """
    ax.plot(lags, autocov)
    ax.set_title(title)
    ax.set_xlabel('Lag')
    ax.set_ylabel('Autocovariance')

def load_model():
    # Initialize the representation learning model
    model = REPRESENTATION_LEARNING_MODEL_CLASS()
    model.compile()

    # Perform a forward pass with random input to initialize model weights
    model(np.random.normal(size=[BATCH_SIZE, 3000, N_CHANNELS]))

    # Construct the checkpoint path for the model weights
    cp_path = get_checkpoint_path(
        EXP_NAME,
        REPRESENTATION_LEARNING_MODEL_CLASS().name,
        TRAIN_DATASET,
        SPLIT,
        EPOCH
    )

    # Load the pre-trained weights into the model
    model.load_weights(cp_path)
    return model

def load_sample_data():
    """
    Generates sample waveform and heatmap data.
    
    Returns:
        waveform (np.ndarray): Shape (NUM_SAMPLES, 3000, 3)
        labels (np.ndarray): Shape (NUM_SAMPLES)
        metadata (dataframe)
    """
    # Create a K-Fold environment for the specified test dataset
    kenv = KFoldEnvironment(TEST_DATASET)

    # Retrieve metadata for training, validation, and testing splits
    __, __, test_metadata = kenv.get_split_metadata(SPLIT)

    # Retrieve data generators for training, validation, and testing
    __, __, test_gen, __ = kenv.get_generators(SPLIT)
    
    # -------------------------------
    # Data Preparation
    # -------------------------------

    # Initialize lists to hold batches of data
    X = []
    Y = []

    num_batches = 1 + (NUM_SAMPLES // BATCH_SIZE)
    
    # Iterate over the test generator.
    for i in range(num_batches):
        x_batch, y_batch = test_gen.__getitem__(i)
        X.append(x_batch)
        Y.append(y_batch)

    # Concatenate all batches into single numpy arrays
    X = np.concatenate(X, axis=0)
    Y = np.concatenate(Y, axis=0)
    
    return X, Y, test_metadata

In [19]:
# Generate sample data
waveforms, labels, metadata = load_sample_data()
model = load_model()
feature_maps, __ = model(waveforms)

feature_maps = feature_maps.numpy()
NUM_SAMPLES = 50  # Adjust as needed
WAVEFORM_COLORS = ['blue', 'green', 'red']  # Adjust based on actual channels

# Separate earthquake and noise indices
earthquake_indices = [i for i, label in enumerate(labels) if label > 0.5]
noise_indices = [i for i, label in enumerate(labels) if label <= 0.5]

# Ensure equal number of earthquake and noise samples
NUM_PLOTS = min(len(earthquake_indices), len(noise_indices), NUM_SAMPLES)

for plot_idx in range(NUM_PLOTS):
    eq_idx = earthquake_indices[plot_idx]
    noise_idx = noise_indices[plot_idx]
    
    # Extract earthquake data
    eq_waveform = waveforms[eq_idx]
    eq_feature_map = feature_maps[eq_idx]
    lags_waveform_eq, autocov_waveform_eq = compute_autocovariance(eq_waveform)  # Averaging over channels
    lags_heatmap_eq, autocov_heatmap_eq = compute_autocovariance(eq_feature_map)
    
    # Extract noise data
    noise_waveform = waveforms[noise_idx]
    noise_feature_map = feature_maps[noise_idx]
    lags_waveform_noise, autocov_waveform_noise = compute_autocovariance(noise_waveform)  # Averaging over channels
    lags_heatmap_noise, autocov_heatmap_noise = compute_autocovariance(noise_feature_map)
    
    # Create a figure with a 1x2 grid: left for earthquake, right for noise
    fig = plt.figure(figsize=(20, 10))  # Adjust size as needed
    main_gs = GridSpec(1, 2, figure=fig, wspace=0.3)
    
    # --- Earthquake Column ---
    eq_gs = main_gs[0, 0].subgridspec(2, 2, wspace=0.3, hspace=0.3)
    
    # Top-Left: Waveform Channels
    eq_waveform_gs = eq_gs[0, 0].subgridspec(eq_waveform.shape[1], 1, hspace=0.1)
    timesteps_eq = np.arange(eq_waveform.shape[0])
    
    for channel in range(eq_waveform.shape[1]):
        ax = fig.add_subplot(eq_waveform_gs[channel, 0])
        show_xticks = (channel == eq_waveform.shape[1] - 1)
        plot_waveform_channel(ax, timesteps_eq, eq_waveform[:, channel], channel, 
                              color=WAVEFORM_COLORS[channel % len(WAVEFORM_COLORS)], 
                              show_xticks=show_xticks)
    
    # Top-Right: Heatmap
    ax_heatmap_eq = fig.add_subplot(eq_gs[0, 1])
    plot_heatmap(ax_heatmap_eq, eq_feature_map.T)
    
    # Bottom-Left: Autocovariance of Waveform
    ax_autocov_waveform_eq = fig.add_subplot(eq_gs[1, 0])
    plot_autocovariance(ax_autocov_waveform_eq, lags_waveform_eq, autocov_waveform_eq, 
                        'Autocovariance of Earthquake Waveform\n(Averaged over channels axis)')
    
    # Bottom-Right: Autocovariance of Heatmap
    ax_autocov_heatmap_eq = fig.add_subplot(eq_gs[1, 1])
    plot_autocovariance(ax_autocov_heatmap_eq, lags_heatmap_eq, autocov_heatmap_eq, 
                        'Autocovariance of Earthquake Representation\n(Averaged over channel axis)')
    
    # --- Noise Column ---
    noise_gs = main_gs[0, 1].subgridspec(2, 2, wspace=0.3, hspace=0.3)
    
    # Top-Left: Waveform Channels
    noise_waveform_gs = noise_gs[0, 0].subgridspec(noise_waveform.shape[1], 1, hspace=0.3)
    timesteps_noise = np.arange(noise_waveform.shape[0])
    
    for channel in range(noise_waveform.shape[1]):
        ax = fig.add_subplot(noise_waveform_gs[channel, 0])
        show_xticks = (channel == noise_waveform.shape[1] - 1)
        plot_waveform_channel(ax, timesteps_noise, noise_waveform[:, channel], channel, 
                              color=WAVEFORM_COLORS[channel % len(WAVEFORM_COLORS)], 
                              show_xticks=show_xticks)
    
    # Top-Right: Heatmap
    ax_heatmap_noise = fig.add_subplot(noise_gs[0, 1])
    plot_heatmap(ax_heatmap_noise, noise_feature_map.T)
    
    # Bottom-Left: Autocovariance of Waveform
    ax_autocov_waveform_noise = fig.add_subplot(noise_gs[1, 0])
    plot_autocovariance(ax_autocov_waveform_noise, lags_waveform_noise, autocov_waveform_noise, 
                        'Autocovariance of Noise Waveform\n(Averaged over channels axis)')
    
    # Bottom-Right: Autocovariance of Heatmap
    ax_autocov_heatmap_noise = fig.add_subplot(noise_gs[1, 1])
    plot_autocovariance(ax_autocov_heatmap_noise, lags_heatmap_noise, autocov_heatmap_noise, 
                        'Autocovariance of Noise Heatmap\n(Averaged over channels axis)')
    
    # Adjust overall layout and save the figure
    plt.tight_layout(rect=[0, 0.03, 0.03, 0.75])  # Adjust rect to accommodate the main title
    plt.savefig(f"latent_plot_pair_{plot_idx + 1}.png")
    plt.close(fig)  # Close the figure to free memory

  plt.tight_layout(rect=[0, 0.03, 0.03, 0.75])  # Adjust rect to accommodate the main title
  plt.tight_layout(rect=[0, 0.03, 0.03, 0.75])  # Adjust rect to accommodate the main title
  plt.tight_layout(rect=[0, 0.03, 0.03, 0.75])  # Adjust rect to accommodate the main title
  plt.tight_layout(rect=[0, 0.03, 0.03, 0.75])  # Adjust rect to accommodate the main title
  plt.tight_layout(rect=[0, 0.03, 0.03, 0.75])  # Adjust rect to accommodate the main title
  plt.tight_layout(rect=[0, 0.03, 0.03, 0.75])  # Adjust rect to accommodate the main title
  plt.tight_layout(rect=[0, 0.03, 0.03, 0.75])  # Adjust rect to accommodate the main title
  plt.tight_layout(rect=[0, 0.03, 0.03, 0.75])  # Adjust rect to accommodate the main title
  plt.tight_layout(rect=[0, 0.03, 0.03, 0.75])  # Adjust rect to accommodate the main title
  plt.tight_layout(rect=[0, 0.03, 0.03, 0.75])  # Adjust rect to accommodate the main title
  plt.tight_layout(rect=[0, 0.03, 0.03, 0.75])  # Adjust rect to accommodate the