In [None]:
import tensorflow.compat.v1 as tf1
tf1.disable_v2_behavior()

import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
tf.config.experimental.set_visible_devices(physical_devices[0], 'GPU')

In [None]:
from __future__ import print_function, division

import os
import sys
import math
import csv
import json
import time
import random
import pickle
import shutil
import logging
import warnings
import platform
import contextlib
from glob import glob
from datetime import datetime, timedelta
from os import listdir
from os.path import join

import faulthandler
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm
from scipy import signal
from matplotlib.lines import Line2D

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras import callbacks, constraints, initializers, layers, models, optimizers, regularizers
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing import sequence, text
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import (
    Callback,
    EarlyStopping,
    LearningRateScheduler,
    ModelCheckpoint,
    ReduceLROnPlateau,
)
from tensorflow.keras.layers import (
    Activation,
    Add,
    Average,
    AveragePooling2D,
    BatchNormalization,
    Bidirectional,
    Conv1D,
    Conv2D,
    Conv2DTranspose,
    Dense,
    Dropout,
    Embedding,
    Flatten,
    GlobalAveragePooling1D,
    GlobalAveragePooling2D,
    GlobalMaxPooling1D,
    GRU,
    Input,
    Lambda,
    LSTM,
    MaxPooling1D,
    MaxPooling2D,
    Permute,
    Reshape,
    SeparableConv1D,
    SpatialDropout1D,
    UpSampling1D,
    UpSampling2D,
    concatenate,
    multiply,
)

from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from numpy.random import seed

# Environment / runtime settings
os.environ["KERAS_BACKEND"] = "tensorflow"
matplotlib.use("agg")
faulthandler.enable()
warnings.filterwarnings("ignore")

# Silence TF deprecation warnings (internal API; keep only if you really need it)
from tensorflow.python.util import deprecation  # noqa: E402
deprecation._PRINT_DEPRECATION_WARNINGS = False


def recall(y_true, y_pred):
    """Compute recall metric."""
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    return true_positives / (possible_positives + K.epsilon())


def precision(y_true, y_pred):
    """Compute precision metric."""
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    return true_positives / (predicted_positives + K.epsilon())


def f1(y_true, y_pred):
    """Compute F1 score metric."""
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    return 2 * ((p * r) / (p + r + K.epsilon()))

## Foundation Model (U-Trans)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers import Conv1D, MaxPooling1D, UpSampling1D, concatenate

# -----------------------------
# U-Trans (Foundation) settings
# -----------------------------
STOCHASTIC_DEPTH_RATE = 0.1
POSITIONAL_EMB = False
CONV_LAYERS = 1

NUM_CLASSES = 1
INPUT_SHAPE = (375, 256)

PROJECTION_DIM = 80
NUM_HEADS = 4
TRANSFORMER_UNITS = [PROJECTION_DIM, PROJECTION_DIM]
TRANSFORMER_LAYERS = 4


def mlp(x, hidden_units, dropout_rate: float):
    """Feed-forward MLP block used inside the transformer."""
    for units in hidden_units:
        x = layers.Dense(units, activation=tf.nn.gelu)(x)
        x = layers.Dropout(dropout_rate)(x)
    return x


# Referred from: github.com:rwightman/pytorch-image-models
class StochasticDepth(layers.Layer):
    """Stochastic depth (a.k.a. DropPath) layer."""

    def __init__(self, drop_prob: float, **kwargs):
        super().__init__(**kwargs)
        self.drop_prob = float(drop_prob)

    def call(self, x, training=None):
        if training:
            keep_prob = 1.0 - self.drop_prob
            shape = (tf.shape(x)[0],) + (1,) * (len(tf.shape(x)) - 1)
            random_tensor = keep_prob + tf.random.uniform(shape, 0, 1)
            binary_tensor = tf.floor(random_tensor)
            return (x / keep_prob) * binary_tensor
        return x


class CCTTokenizer1(layers.Layer):
    """Compact Convolutional Transformer (CCT) tokenizer for 1D signals."""

    def __init__(
        self,
        kernel_size: int = 3,
        stride: int = 1,
        num_conv_layers: int = CONV_LAYERS,
        num_output_channels=None,
        positional_emb: bool = POSITIONAL_EMB,
        **kwargs,
    ):
        super().__init__(**kwargs)

        if num_output_channels is None:
            # Provide enough channels for any reasonable num_conv_layers
            num_output_channels = [int(PROJECTION_DIM)] * 8

        self.positional_emb = positional_emb

        conv_stack = []
        for i in range(num_conv_layers):
            conv_stack.append(
                layers.Conv1D(
                    filters=num_output_channels[i],
                    kernel_size=kernel_size,
                    strides=stride,
                    padding="same",
                    use_bias=False,
                    activation="relu",
                    kernel_initializer="he_normal",
                )
            )
        self.conv_model = tf.keras.Sequential(conv_stack)

    def call(self, inputs):
        # Output shape: (batch, length, channels) -> already token-like for transformer
        return self.conv_model(inputs)

    def positional_embedding(self, image_size: int):
        """Optional positional embeddings (kept for parity with original code)."""
        if not self.positional_emb:
            return None

        dummy_inputs = tf.ones((1, image_size, 1))
        dummy_outputs = self.call(dummy_inputs)
        seq_length = int(dummy_outputs.shape[1])
        proj_dim = int(dummy_outputs.shape[-1])

        embed_layer = layers.Embedding(input_dim=seq_length, output_dim=proj_dim)
        return embed_layer, seq_length


def create_cct_model1(inputs):
    """Transformer encoder over 1D tokens produced by CCTTokenizer1."""
    cct_tokenizer = CCTTokenizer1()
    encoded_patches = cct_tokenizer(inputs)

    if POSITIONAL_EMB:
        # NOTE: image_size must be defined by the caller if enabling positional embeddings.
        # pos_embed, seq_length = cct_tokenizer.positional_embedding(image_size)
        # positions = tf.range(start=0, limit=seq_length, delta=1)
        # encoded_patches += pos_embed(positions)
        raise ValueError("POSITIONAL_EMB=True requires defining image_size and enabling the code block.")

    dpr = np.linspace(0, STOCHASTIC_DEPTH_RATE, TRANSFORMER_LAYERS).tolist()

    for i in range(TRANSFORMER_LAYERS):
        x1 = layers.LayerNormalization(epsilon=1e-5)(encoded_patches)

        attention_output = layers.MultiHeadAttention(
            num_heads=NUM_HEADS,
            key_dim=PROJECTION_DIM,
            dropout=0.1,
        )(x1, x1)

        attention_output = StochasticDepth(dpr[i])(attention_output)
        x2 = layers.Add()([attention_output, encoded_patches])

        x3 = layers.LayerNormalization(epsilon=1e-5)(x2)
        x3 = mlp(x3, hidden_units=TRANSFORMER_UNITS, dropout_rate=0.1)

        x3 = StochasticDepth(dpr[i])(x3)
        encoded_patches = layers.Add()([x3, x2])

    return layers.LayerNormalization(epsilon=1e-5)(encoded_patches)


def UNET(inputs, d1: int):
    """1D U-Net with a CCT transformer bottleneck (U-Trans)."""
    d2, d3, d4, d5 = d1 * 2, d1 * 4, d1 * 8, d1 * 16

    # Encoder
    conv1 = Conv1D(d1, 3, activation="relu", padding="same", kernel_initializer="he_normal")(inputs)
    conv1 = Conv1D(d1, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv1)
    pool1 = MaxPooling1D(pool_size=2)(conv1)

    conv2 = Conv1D(d2, 3, activation="relu", padding="same", kernel_initializer="he_normal")(pool1)
    conv2 = Conv1D(d2, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv2)
    pool2 = MaxPooling1D(pool_size=2)(conv2)

    conv3 = Conv1D(d3, 3, activation="relu", padding="same", kernel_initializer="he_normal")(pool2)
    conv3 = Conv1D(d3, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv3)
    pool3 = MaxPooling1D(pool_size=2)(conv3)

    conv4 = Conv1D(d4, 3, activation="relu", padding="same", kernel_initializer="he_normal")(pool3)
    conv4 = Conv1D(d4, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv4)
    pool4 = MaxPooling1D(pool_size=2)(conv4)

    conv44 = Conv1D(d5, 3, activation="relu", padding="same", kernel_initializer="he_normal")(pool4)
    conv44 = Conv1D(d5, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv44)
    pool44 = MaxPooling1D(pool_size=5)(conv44)

    # Bottleneck + Transformer
    conv5 = Conv1D(d5, 3, activation="relu", padding="same", kernel_initializer="he_normal")(pool44)
    conv5 = Conv1D(d5, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv5)
    drop5 = create_cct_model1(conv5)

    # Decoder
    up66 = Conv1D(d5, 3, activation="relu", padding="same", kernel_initializer="he_normal")(
        UpSampling1D(size=5)(drop5)
    )
    merge66 = concatenate([pool4, up66], axis=-1)
    conv66 = Conv1D(d5, 3, activation="relu", padding="same", kernel_initializer="he_normal")(merge66)
    conv66 = Conv1D(d5, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv66)

    up6 = Conv1D(d4, 3, activation="relu", padding="same", kernel_initializer="he_normal")(
        UpSampling1D(size=2)(conv66)
    )
    merge6 = concatenate([conv4, up6], axis=-1)
    conv6 = Conv1D(d4, 3, activation="relu", padding="same", kernel_initializer="he_normal")(merge6)
    conv6 = Conv1D(d4, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv6)

    up7 = Conv1D(d3, 2, activation="relu", padding="same", kernel_initializer="he_normal")(
        UpSampling1D(size=2)(conv6)
    )
    merge7 = concatenate([conv3, up7], axis=-1)
    conv7 = Conv1D(d3, 3, activation="relu", padding="same", kernel_initializer="he_normal")(merge7)
    conv7 = Conv1D(d3, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv7)

    up8 = Conv1D(d2, 2, activation="relu", padding="same", kernel_initializer="he_normal")(
        UpSampling1D(size=2)(conv7)
    )
    merge8 = concatenate([conv2, up8], axis=-1)
    conv8 = Conv1D(d2, 3, activation="relu", padding="same", kernel_initializer="he_normal")(merge8)
    conv8 = Conv1D(d2, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv8)

    up9 = Conv1D(d1, 2, activation="relu", padding="same", kernel_initializer="he_normal")(
        UpSampling1D(size=2)(conv8)
    )
    merge9 = concatenate([conv1, up9], axis=-1)
    conv9 = Conv1D(d1, 3, activation="relu", padding="same", kernel_initializer="he_normal")(merge9)
    conv9 = Conv1D(d1, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv9)

    return conv9

In [None]:
import os
import time
import shutil
import datetime
import multiprocessing
import warnings

import h5py
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Conv1D
from tensorflow.keras.callbacks import (
    EarlyStopping,
    ReduceLROnPlateau,
    LearningRateScheduler,
    ModelCheckpoint,
)

from EqT_utils_Recon import DataGenerator, _lr_schedule
input_shape=(6000, 3)
matplotlib.use("agg")
warnings.filterwarnings("ignore")

from tensorflow.python.util import deprecation  # noqa: E402
deprecation._PRINT_DEPRECATION_WARNINGS = False


def trainer1(
    input_hdf5=None,
    input_csv=None,
    output_name=None,
    input_dimention=(6000, 3),
    cnn_blocks=5,
    lstm_blocks=2,
    padding="same",
    activation="relu",
    drop_rate=0.1,
    shuffle=True,
    loss_types = 'mse',
    normalization_mode="std",
    augmentation=True,
    add_event_r=0.6,
    shift_event_r=0.99,
    add_noise_r=0.3,
    drop_channel_r=0.5,
    add_gap_r=0.2,
    scale_amplitude_r=None,
    pre_emphasis=False,
    train_valid_test_split=(0.85, 0.05, 0.10),
    mode="generator",
    batch_size=200,
    epochs=200,
    monitor="val_loss",
    patience=12,
    multi_gpu=False,
    number_of_gpus=4,
    gpuid=None,
    gpu_limit=None,
    use_multiprocessing=True,
):
    """Train the reconstruction model to recover clean signals from corrupted time/frequency data."""

    args = {
        "input_hdf5": input_hdf5,
        "input_csv": input_csv,
        "output_name": output_name,
        "input_dimention": input_dimention,
        "cnn_blocks": cnn_blocks,
        "lstm_blocks": lstm_blocks,
        "padding": padding,
        "loss_types":loss_types,
        "activation": activation,
        "drop_rate": drop_rate,
        "shuffle": shuffle,
        "normalization_mode": normalization_mode,
        "augmentation": augmentation,
        "add_event_r": add_event_r,
        "shift_event_r": shift_event_r,
        "add_noise_r": add_noise_r,
        "add_gap_r": add_gap_r,
        "drop_channel_r": drop_channel_r,
        "scale_amplitude_r": scale_amplitude_r,
        "pre_emphasis": pre_emphasis,
        "train_valid_test_split": train_valid_test_split,
        "mode": mode,
        "batch_size": batch_size,
        "epochs": epochs,
        "monitor": monitor,
        "patience": patience,
        "multi_gpu": multi_gpu,
        "number_of_gpus": number_of_gpus,
        "gpuid": gpuid,
        "gpu_limit": gpu_limit,
        "use_multiprocessing": use_multiprocessing,
    }

    def train(cfg):
        save_dir, save_models = _make_dir(cfg["output_name"])
        training, validation = _split(cfg, save_dir)
        cb = _make_callback(cfg, save_models)
        model = _build_model(cfg)
        model.summary()

        if cfg["gpuid"] is not None:
            os.environ["CUDA_VISIBLE_DEVICES"] = str(cfg["gpuid"])
            config = tf.compat.v1.ConfigProto()
            config.gpu_options.allow_growth = True
            if cfg["gpu_limit"] is not None:
                config.gpu_options.per_process_gpu_memory_fraction = float(cfg["gpu_limit"])
            sess = tf.compat.v1.Session(config=config)
            tf.compat.v1.keras.backend.set_session(sess)

        start_training = time.time()

        if cfg["mode"] != "generator":
            raise ValueError("Only mode='generator' is supported in this cell.")

        params_training = {
            "file_name": str(cfg["input_hdf5"]),
            "dim": cfg["input_dimention"][0],
            "batch_size": cfg["batch_size"],
            "n_channels": cfg["input_dimention"][-1],
            "shuffle": cfg["shuffle"],
            "norm_mode": cfg["normalization_mode"],
            "augmentation": cfg["augmentation"],
            "add_event_r": cfg["add_event_r"],
            "add_gap_r": cfg["add_gap_r"],
            "shift_event_r": cfg["shift_event_r"],
            "add_noise_r": cfg["add_noise_r"],
            "drop_channe_r": cfg["drop_channel_r"],
            "scale_amplitude_r": cfg["scale_amplitude_r"],
            "pre_emphasis": cfg["pre_emphasis"],
        }

        params_validation = {
            "file_name": str(cfg["input_hdf5"]),
            "dim": cfg["input_dimention"][0],
            "batch_size": cfg["batch_size"],
            "n_channels": cfg["input_dimention"][-1],
            "shuffle": False,
            "norm_mode": cfg["normalization_mode"],
            "augmentation": False,
        }

        training_generator = DataGenerator(training, **params_training)
        validation_generator = DataGenerator(validation, **params_validation)

        history = model.fit(
            training_generator,
            validation_data=validation_generator,
            use_multiprocessing=cfg["use_multiprocessing"],
            workers=multiprocessing.cpu_count(),
            callbacks=cb,
            epochs=cfg["epochs"],
            verbose=1,
        )

        end_training = time.time()
        return history, model, start_training, end_training, save_dir, save_models, len(training), len(validation)

    history, model, start_training, end_training, save_dir, save_models, training_size, validation_size = train(args)



def _make_dir(output_name):
    """Create output directories (overwrite if exists)."""
    if output_name is None:
        raise ValueError("Please specify output_name.")

    save_dir = os.path.join(os.getcwd(), f"{output_name}_outputs")
    save_models = os.path.join(save_dir, "models")

    if os.path.isdir(save_dir):
        shutil.rmtree(save_dir)
    os.makedirs(save_models, exist_ok=True)

    return save_dir, save_models


def _build_model(args):
    """Build and compile the reconstruction model."""
    d1 = 5
    inp = Input(shape=input_shape, name="input")  # expects `input_shape` and `UNET` to be defined
    x = UNET(inp, d1)

    x = Conv1D(d1, 3, strides=1, padding="same", kernel_initializer="he_normal")(x)
    out = Conv1D(3, 3, strides=1, padding="same", kernel_initializer="he_normal", name="reconstruction")(x)

    model = Model(inp, out)
    model.compile(optimizer="adam", loss="mae", metrics=["mae"])
    return model


def _split(args, save_dir):
    """Load pre-defined train/valid/test IDs and return train/valid splits."""
    training = np.load("IDS_Collected_Data_train.npy")    
    validation = np.load("IDS_Collected_Data_valid.npy")
    _ = np.load("IDS_Collected_Data_test.npy")  # loaded to keep parity; not used here
    return training, validation


def _make_callback(args, save_models):
    """Callbacks: checkpoint, LR reducer, scheduler, and early stopping."""
    model_name = f"{args['output_name']}" + "_{epoch:03d}.h5"
    filepath = os.path.join(save_models, model_name)

    checkpoint = ModelCheckpoint(
        filepath=filepath,
        monitor=args["monitor"],
        mode="auto",
        verbose=1,
        save_best_only=True,
        save_weights_only=True,
    )

    lr_reducer = ReduceLROnPlateau(
        factor=np.sqrt(0.1),
        cooldown=0,
        patience=args["patience"] - 2,
        min_lr=0.5e-6,
    )

    lr_scheduler = LearningRateScheduler(_lr_schedule)
    early_stop = EarlyStopping(monitor=args["monitor"], patience=args["patience"])

    return [checkpoint, lr_reducer, lr_scheduler, early_stop]


def _pre_loading(args, training, validation):
    """Optional: load HDF5 into memory (kept for compatibility)."""
    training_set = {}
    fl = h5py.File(args["input_hdf5"], "r")

    print("Loading the training data into memory ...")
    pbar = tqdm(total=len(training))
    for trace_id in training:
        pbar.update()
        dataset = fl.get(str(trace_id))
        training_set[str(trace_id)] = dataset

    print("Loading the validation data into memory ...", flush=True)
    validation_set = {}
    pbar = tqdm(total=len(validation))
    for trace_id in validation:
        pbar.update()
        dataset = fl.get(str(trace_id))
        validation_set[str(trace_id)] = dataset

    params_training = {
        "dim": args["input_dimention"][0],
        "batch_size": args["batch_size"],
        "n_channels": args["input_dimention"][-1],
        "shuffle": args["shuffle"],
        "norm_mode": args["normalization_mode"],
        "augmentation": args["augmentation"],
        "add_event_r": args["add_event_r"],
        "add_gap_r": args["add_gap_r"],
        "shift_event_r": args["shift_event_r"],
        "add_noise_r": args["add_noise_r"],
        "drop_channe_r": args["drop_channel_r"],
        "scale_amplitude_r": args["scale_amplitude_r"],
        "pre_emphasis": args["pre_emphasis"],
    }

    params_validation = {
        "dim": args["input_dimention"][0],
        "batch_size": args["batch_size"],
        "n_channels": args["input_dimention"][-1],
        "shuffle": False,
        "norm_mode": args["normalization_mode"],
        "augmentation": False,
    }

    training_generator = PreLoadGenerator(training, training_set, **params_training)  # expects PreLoadGenerator
    validation_generator = PreLoadGenerator(validation, validation_set, **params_validation)
    return training_generator, validation_generator

In [None]:
trainer1(input_hdf5= r'/scratch/sadalyom/DataCollected',
        input_csv  = '',
        output_name='test_trainer_2',                
        shuffle=True, 
        normalization_mode='std',
        augmentation=True,
        add_event_r=0.5,
        shift_event_r=0.99,
        add_noise_r=0.5, 
        drop_channel_r=False,
        add_gap_r=None,
        scale_amplitude_r=None,
        pre_emphasis=False,               
        loss_types='mae',
        mode='generator',
        batch_size=40,
        epochs=50, 
        patience=10,
        gpuid=None,
        gpu_limit=None)