In [3]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import os
from seismic_purifier.config import BATCH_SIZE
from seismic_purifier.representation_learning_models import (
    RepresentationLearningAutoencoder,
    RepresentationLearningDenoisingAutoencoder,
    RepresentationLearningAutoencoderEnsemble
)
from seismic_purifier.classifier_models import (
    ClassifierAutocovariance, 
    ClassifierAugmentationCrossCovariances, 
    ClassifierRepresentationCrossCovariances
)

In [4]:
# ============================
# 1. Configuration
# ============================

# Paths to your data
TRAIN_DATA_PATH = 'data/train_data.npy'  # Replace with your actual path
VAL_DATA_PATH = 'data/val_data.npy'  # Replace with your actual path
TEST_DATA_PATH = 'data/test_data.npy'  # Replace with your actual path

# Directory to save checkpoints and the final model
CHECKPOINT_DIR = 'checkpoints'
MODEL_SAVE_PATH = 'saved_models/autoencoder_model'

# Training parameters
EPOCHS = 50
LEARNING_RATE = 1e-4

In [5]:
# Ensure directories exist
os.makedirs(CHECKPOINT_DIR, exist_ok=True)
os.makedirs(os.path.dirname(MODEL_SAVE_PATH), exist_ok=True)

# ============================
# 2. Data loading
# ============================
X_train = np.load(TRAIN_DATA_PATH)  # Expected shape: (num_samples, 3000, 3)
print(f"Training data shape: {X_train.shape}")

X_val = np.load(VAL_DATA_PATH)  # Expected shape: (num_samples, 3000, 3)
print(f"Validation data shape: {X_val.shape}")

X_test = np.load(TEST_DATA_PATH)
print(f"Test data shape: {X_test.shape}")

FileNotFoundError: [Errno 2] No such file or directory: 'data/train_data.npy'

In [6]:
# ============================
# 3. Representation Learning Model Instantiation
# ============================

# Choose the model you want to train
# For example, using RepresentationLearningAutoencoder
model = RepresentationLearningAutoencoder(
    name="rep_learning_autoencoder"
)

# Alternatively, you can choose other models:
# model = RepresentationLearningDenoisingAutoencoder(
#     name="rep_learning_denoising_autoencoder",
#     input_noise_std=1e-6,
#     denoising_noise_std=2e-1
# )
# model = RepresentationLearningAutoencoderEnsemble(
#     name="rep_learning_autoencoder_ensemble",
#     input_noise_std=1e-6,
#     eps=1e-27
# )

2024-09-21 17:58:05.777043: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-21 17:58:06.921134: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-21 17:58:06.921252: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [None]:
# ============================
# 4. Model Compilation
# ============================
optimizer = keras.optimizers.Adam(learning_rate=LEARNING_RATE) 
model.compile(optimizer=optimizer)

In [None]:
# ============================
# 5. Callbacks Setup
# ============================
# Define callbacks for saving checkpoints, early stopping.
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath=os.path.join(CHECKPOINT_DIR, 'autoencoder_epoch_{epoch:02d}.ckpt'),
        save_weights_only=True,
        save_freq='epoch',
        verbose=1
    ),
    keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=2,
        restore_best_weights=True,
        verbose=1
    )
]

In [None]:
# ============================
# 6. Training the Representation Learning Model and Save.
# ============================
fit_result = model.fit(X_train, 
                       validation_data=X_val, 
                       epochs=EPOCHS, 
                       batch_size=BATCH_SIZE, 
                       callbacks=callbacks, 
                       shuffle=False)

model.save(MODEL_SAVE_PATH)
print(f"Model saved to {MODEL_SAVE_PATH}")

In [None]:
# ============================
# 7. Classifier Model Instantiation
# ============================

# Choose the model for classification. This is just for convenience, these models are actually wrappers around
#representation learning models.
# For example, using RepresentationLearningAutoencoder
model_classifier = ClassifierAutocovariance(model)

# Alternatively, you can choose other wrappers. 
# model_classifier = ClassifierAugmentationCrossCovariances(model)
# model_classifier = ClassifierRepresentationCrossCovariances(model)

"""Note: One should be careful about the compatibility of the classifier wrappers with the models. RepresentationLearningAutoencoder and RepresentationLearningDenoising
Autoencoder are compatible with ClassifierAutocovariance, ClassifierAugmentationCrossCovariances. However, RepresentationLearningAutoencoderEnsemble is only compatible with 
ClassifierRepresentationCrossCovariances. """

In [None]:
# ============================
# 8. Obtain earthquake probabilities(unnormalized)
# ============================
earthquake_scores = model_classifier(X_val)