In [3]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv1D, Dense, Concatenate, TimeDistributed, Reshape
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import numpy as np
from helper_funcs import gen_samples

In [None]:
# First navigate to our directory
transfer_directory_path = os.path.join("Data", "synth_transfer_df.parquet")
general_directory_path = os.path.join("Data", "synth_general_df.parquet")
# Load the dataframes
synth_transfer_df = pd.read_parquet(transfer_directory_path)
synth_general_df = pd.read_parquet(general_directory_path)
# Concatenate (after making sure they share columns) and then reset indices
assert list(synth_transfer_df.columns) == list(synth_general_df.columns), "Column names do not match!"
df = pd.concat([synth_transfer_df, synth_general_df], axis=0)
df.reset_index(drop=True, inplace=True)

In [None]:
# Split into train (70%) and temp (30%) with stratification
train_df, temp_df = train_test_split(
    df,
    test_size=0.3,
    stratify=df['species'],  # Stratify based on the 'species' column
    random_state=42
)

# Split temp into test (15%) and validation (15%)
test_df, val_df = train_test_split(
    temp_df,
    test_size=0.5,
    stratify=temp_df['species'],  # Stratify again to maintain balance
    random_state=42
)

In [None]:
# Prepare samples
X_train, y_train = gen_samples(train_df)
X_test, y_test = gen_samples(test_df)
X_val, y_val = gen_samples(val_df)

In [62]:
def weight_func(snr, k=3):
    return ((snr/k)**k) / (1 + (snr/k)**k)


def custom_loss(y_true, y_pred):
    """
    Custom loss function for (batch_size, N, 3):
    - Binary cross-entropy for the first output node.
    - MSE for the second and third output nodes, masked by the first node's true labels.
    - Each bin in each sample is weighted by f(SNR), where SNR is the 3rd node label.
    
    Args:
    y_true: Tensor of true labels, shape (batch_size, N, 3).
    y_pred: Tensor of predicted values, shape (batch_size, N, 3).
    
    Returns:
    A scalar tensor representing the combined loss.
    """

    # Mean squared error for the second and third nodes
    mse_loss_2 = tf.square(y_true[..., 1] - y_pred[..., 1])
    mse_loss_3 = tf.square(y_true[..., 2] - y_pred[..., 2])
    mse_loss = mse_loss_2 + mse_loss_3  # Shape (batch_size, N)

    # Mask the MSE loss where the first node's true label is 0
    mask = tf.cast(y_true[..., 0] > 0, tf.float32)  # Shape (batch_size, N)
    masked_mse_loss = mse_loss * mask  # Shape (batch_size, N)
    
    # Manually calculate binary cross-entropy for the first node
    epsilon = 1e-7  # Small constant to prevent log(0)
    y_pred_clipped = tf.clip_by_value(y_pred[..., 0], epsilon, 1.0 - epsilon)
    bce_loss = -(y_true[..., 0] * tf.math.log(y_pred_clipped) + (1 - y_true[..., 0]) * tf.math.log(1 - y_pred_clipped))  # Shape (batch_size, N)

    # Weighting each bin by weight_func(SNR), where SNR is the 3rd node label
    snr = y_true[..., 2]  # SNR is the 3rd node label, shape (batch_size, N)
    weights = tf.where(snr < 0, tf.ones_like(snr), weight_func(snr))  # If SNR < 0, weight is 1 (fully weight the BCE loss for non-peak bins), else apply weight_func
    print(weights)

    # Apply weights to the masked MSE loss
    weighted_mse_loss = masked_mse_loss * weights  # Shape (batch_size, N)
    
    # Apply weights to the BCE loss
    weighted_bce_loss = bce_loss * weights  # Shape (batch_size, N)

    # Average weighted MSE, BCE losses across bins (N) for each sample
    mean_mse_loss_per_sample = tf.reduce_mean(weighted_mse_loss, axis=1)  # Mean over N for shape (batch_size,)
    mean_bce_loss_per_sample = tf.reduce_mean(weighted_bce_loss, axis=1)

    # Combine and average across the batch
    total_loss = tf.reduce_mean(mean_bce_loss_per_sample + mean_mse_loss_per_sample)  # Mean over batch size

    return total_loss



In [78]:
# Example data: batch_size=4, N=5, nodes=3
y_true = np.array([
    [[0, 0.5, 0.7], [0, 0.2, -1], [0, 10000, -1000], [0, 0.3, 10], [0, 0.1, 10]],  # Sample 1
    [[0, 0.6, 0.3], [1, 0.1, -1], [1, 0.3, 10], [0, 0.4, 10], [0, 0.7, 10]],  # Sample 2
    [[0, 0.4, 1.5], [1, 0.8, -1], [1, 0.6, 10], [1, 0.2, 10], [0, 0.9, 10]],  # Sample 3
    [[0, 0.5, 0.6], [0, 0.3, -1], [0, 0.7, 10], [1, 0.1, 10], [0, 0.8, 10]],  # Sample 4
])

y_pred = np.array([
    [[0.9, 0.6, 0.8], [0.9, 0.3, 0.5], [0.5, 100000, 1000], [0.7, 0.4, 0.6], [0.2, 0.1, 0.3]],  # Sample 1
    [[0.7, 0.5, 0.4], [0.9, 0.2, 0.3], [0.9, 0.4, 0.6], [0.6, 0.7, 0.9], [0.8, 0.7, 0.8]],  # Sample 2
    [[0.8, 0.4, 0.5], [0.9, 0.6, 0.8], [0.9, 0.7, 0.5], [0.9, 0.3, 0.6], [0.2, 0.9, 0.7]],  # Sample 3
    [[0.9, 0.4, 0.3], [0.9, 0.6, 0.8], [0.6, 0.9, 0.7], [0.9, 0.3, 0.5], [0.8, 0.7, 0.6]],  # Sample 4
])

# Convert to tensors
y_true_tensor = tf.convert_to_tensor(y_true, dtype=tf.float32)
y_pred_tensor = tf.convert_to_tensor(y_pred, dtype=tf.float32)

loss_value = custom_loss(y_true_tensor, y_pred_tensor)
print("Loss Value:", loss_value.numpy())

tf.Tensor(
[[1.2544345e-02 1.0000000e+00 1.0000000e+00 9.7370982e-01 9.7370982e-01]
 [9.9900097e-04 1.0000000e+00 9.7370982e-01 9.7370982e-01 9.7370982e-01]
 [1.1111111e-01 1.0000000e+00 9.7370982e-01 9.7370982e-01 9.7370982e-01]
 [7.9365084e-03 1.0000000e+00 9.7370982e-01 9.7370982e-01 9.7370982e-01]], shape=(4, 5), dtype=float32)
Loss Value: 18.277445


In [None]:
# Define name for this model
model_version = "PP V1"

# Define the input length (N)
N = 8192

# Input layer
input_layer = Input(shape=(N, 1), name="Input")

# Inception-like layer with 1D convolutions
conv2 = Conv1D(16, kernel_size=2, activation='relu', padding='same', name="Conv_2")(input_layer)
conv4 = Conv1D(32, kernel_size=4, activation='relu', padding='same', name="Conv_4")(input_layer)
conv8 = Conv1D(64, kernel_size=8, activation='relu', padding='same', name="Conv_8")(input_layer)
conv32 = Conv1D(16, kernel_size=32, activation='relu', padding='same', name="Conv_32")(input_layer)
conv64 = Conv1D(8, kernel_size=64, activation='relu', padding='same', name="Conv_64")(input_layer)
conv128 = Conv1D(4, kernel_size=128, activation='relu', padding='same', name="Conv_128")(input_layer)
conv256 = Conv1D(4, kernel_size=256, activation='relu', padding='same', name="Conv_256")(input_layer)

# Concatenate the outputs of all convolutional layers
concat_layer = Concatenate(name="Inception_Concat")([conv2, conv4, conv8, conv32, conv64, conv128, conv256])

# Time Distributed Dense Layers
td_dense64 = TimeDistributed(Dense(64, activation='relu'), name="Dense_64")(concat_layer)
td_dense32 = TimeDistributed(Dense(32, activation='relu'), name="Dense_32")(td_dense64)
td_dense16 = TimeDistributed(Dense(16, activation='relu'), name="Dense_16")(td_dense32)

# Final layer with 4 outputs per input bin
output = TimeDistributed(Dense(4, activation=None), name="Output")(td_dense16)

# Define the model
model = Model(inputs=input_layer, outputs=output, name=model_version)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss=custom_loss, metrics=["accuracy"])

# Summary
model.summary()

# Example data for training
# Inputs: shape (batch_size, N, 1)
# Outputs: shape (batch_size, N, 4)

# model.fit(X_train, y_train, batch_size=32, epochs=10)


In [None]:
# Define batch size, number of epochs, and patience
batch_size = 32
epochs = 15
patience = 3

weight_path=os.path.join("PP Weights", f"{model_version}.h5")


# Add callbacks for better training
callbacks = [
    EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True),  # Stop if no improvement for 5 epochs
    ModelCheckpoint(weight_path, save_best_only=True, monitor='val_loss')  # Save the best model
]

# Train the model
history = model.fit(
    X_train,                # Training data
    y_train,                # Training labels
    validation_data=(X_val, y_val),  # Validation data
    epochs=epochs,        # Number of epochs
    batch_size=batch_size,  # Batch size
    callbacks=callbacks,    # Add callbacks for early stopping and checkpointing
    verbose=1               # Verbose output
)

In [None]:
# Evaluate the model on the test set
test_loss = model.evaluate(X_test, y_test, verbose=1)  # Verbose output for evaluation

print("Test Loss:", test_loss)

In [None]:
# Example data: batch_size=4, N=5, nodes=3
y_true = np.array([
    [[0, 0.5, 0.7], [0, 0.2, 0.4], [0, 0.9, 10000], [0, 0.3, 10], [0, 0.1, 10]],  # Sample 1
    [[0, 0.6, 0.3], [1, 0.1, 0.2], [1, 0.3, 10], [0, 0.4, 10], [0, 0.7, 10]],  # Sample 2
    [[0, 0.4, 1.5], [1, 0.8, 0.7], [1, 0.6, 10], [1, 0.2, 10], [0, 0.9, 10]],  # Sample 3
    [[0, 0.5, 0.6], [0, 0.3, 0.4], [0, 0.7, 10], [1, 0.1, 10], [0, 0.8, 10]],  # Sample 4
])

y_pred = np.array([
    [[0.9, 0.6, 0.8], [0.9, 0.3, 0.5], [0.8, 0.85, 10000], [0.7, 0.4, 0.6], [0.2, 0.1, 0.3]],  # Sample 1
    [[0.7, 0.5, 0.4], [0.9, 0.2, 0.3], [0.9, 0.4, 0.6], [0.6, 0.7, 0.9], [0.8, 0.7, 0.8]],  # Sample 2
    [[0.8, 0.4, 0.5], [0.9, 0.6, 0.8], [0.9, 0.7, 0.5], [0.9, 0.3, 0.6], [0.2, 0.9, 0.7]],  # Sample 3
    [[0.9, 0.4, 0.3], [0.9, 0.6, 0.8], [0.6, 0.9, 0.7], [0.9, 0.3, 0.5], [0.8, 0.7, 0.6]],  # Sample 4
])

# Convert to tensors
y_true_tensor = tf.convert_to_tensor(y_true, dtype=tf.float32)
y_pred_tensor = tf.convert_to_tensor(y_pred, dtype=tf.float32)

loss_value = custom_loss(y_true_tensor, y_pred_tensor)
print("Loss Value:", loss_value.numpy())