In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
from scipy.signal import find_peaks
from tensorflow.keras.utils import to_categorical
from scipy.fft import rfft
from statsmodels.tsa.stattools import acf
import tensorflow as tf

In [2]:
# --- 1. Simulation Function ---
def simulate_black_hole_lightcurve(fs, fc, fm, qpo_amplitude, duration,
                                   noise_mean=0, noise_std=0.5,
                                   include_qpo=True, modulation_index=0.5):
    """
    Simulate a black hole light curve with stochastic noise and an amplitude-modulated QPO signal.

    Parameters:
        fs : int
            Sampling frequency (Hz)
        fc : float
            Carrier frequency (Hz) for QPO
        fm : float
            Modulating frequency (Hz) for QPO
        qpo_amplitude : float
            Amplitude of the carrier signal (QPO)
        duration : float
            Duration of lightcurve (seconds)
        noise_mean : float
            Mean of the Gaussian noise
        noise_std : float
            Standard deviation of the Gaussian noise
        include_qpo : bool
            Whether to include the QPO signal
        modulation_index : float
            Modulation index for AM signal

    Returns:
        t : np.ndarray
            Time array
        flux : np.ndarray
            Normalized flux array
    """
    # Time array
    t = np.arange(0, duration, 1/fs)

    # White noise
    white_noise = np.random.normal(noise_mean, noise_std, size=len(t))
    white_noise = np.exp(white_noise)

    if include_qpo and qpo_amplitude > 0:
        # Modulating signal
        msg = qpo_amplitude * np.cos(2 * np.pi * fm * t)

        # Carrier signal
        carrier = qpo_amplitude * np.cos(2 * np.pi * fc * t)

        # AM QPO signal
        qpo = carrier * (1 + modulation_index * msg / qpo_amplitude)
    else:
        qpo = 0

    # Combine noise and QPO signal
    flux = white_noise + qpo

    # Normalize
    flux = (flux - np.mean(flux)) / np.std(flux)

    return t, flux

In [3]:
def generate_dataset_with_random_amplitudes(output_dir, num_samples=5000,
                                            fs=1, duration=512,
                                            modulation_index=0.5,
                                            amp_range=(0.1, 1.0)):
    """
    Generate dataset of light curves with random QPO amplitudes from a given range.

    Parameters:
        output_dir : str
            Path to save dataset
        num_samples : int
            Total number of samples (half QPO, half non-QPO)
        fs : int
            Sampling frequency
        duration : int
            Length of each light curve in seconds
        modulation_index : float
            Modulation index for AM QPO
        amp_range : tuple
            Range of amplitudes for QPO (min, max)
    """
    os.makedirs(output_dir, exist_ok=True)
    seq_length = int(duration * fs)
    dataset = []
    labels = []

    for _ in range(num_samples // 2):
        # Random QPO parameters
        fc = np.random.uniform(0.01, 1.0)
        fm = np.random.uniform(0.005, 0.1)
        amp = np.random.uniform(*amp_range)

        # QPO light curve
        t, flux_qpo = simulate_black_hole_lightcurve(
            fs, fc, fm, amp, duration, include_qpo=True,
            modulation_index=modulation_index)

        # Non-QPO light curve (same params but QPO off)
        _, flux_non_qpo = simulate_black_hole_lightcurve(
            fs, fc, fm, amp, duration, include_qpo=False,
            modulation_index=modulation_index)

        dataset.append(flux_qpo[:seq_length].reshape(-1, 1))
        labels.append(1)

        dataset.append(flux_non_qpo[:seq_length].reshape(-1, 1))
        labels.append(0)

    dataset = np.array(dataset)
    labels = to_categorical(np.array(labels), 2)

    np.savez_compressed(os.path.join(output_dir, 'data.npz'), X=dataset, y=labels)


In [4]:
output_folder = "qpo_experiments/dataset_random_amp"
generate_dataset_with_random_amplitudes(
    output_folder, num_samples=10000, amp_range=(0.6, 1.0))


In [5]:
from sklearn.model_selection import train_test_split

def load_split_dataset(npz_path, batch_size=64, test_size=0.2, shuffle=True):
    data = np.load(npz_path)
    X = data['X']  # (N, 512, 1)

    # Split
    X_train, X_val = train_test_split(X, test_size=test_size, random_state=42)

    # TF Datasets
    train_ds = tf.data.Dataset.from_tensor_slices(X_train.astype(np.float32))
    val_ds = tf.data.Dataset.from_tensor_slices(X_val.astype(np.float32))

    if shuffle:
        train_ds = train_ds.shuffle(buffer_size=10000)
    train_ds = train_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    val_ds = val_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)

    return train_ds, val_ds

def load_conditional_dataset(npz_path, batch_size=64, test_size=0.2):
    data = np.load(npz_path)
    X = data['X'].astype(np.float32)
    y = data['y'].astype(np.float32)  # Shape (N, 2)

    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=42)

    train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(10000).batch(batch_size)
    val_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(batch_size)

    return train_ds, val_ds



In [6]:
def build_conditional_generator(latent_dim=100, label_dim=2, seq_length=512):
    noise_input = tf.keras.Input(shape=(latent_dim,))
    label_input = tf.keras.Input(shape=(label_dim,))

    x = tf.keras.layers.Concatenate()([noise_input, label_input])
    x = tf.keras.layers.Dense(128)(x)
    x = tf.keras.layers.LeakyReLU()(x)
    x = tf.keras.layers.Dense(seq_length * 32)(x)
    x = tf.keras.layers.Reshape((seq_length, 32))(x)
    x = tf.keras.layers.GaussianNoise(0.05)(x)
    x = tf.keras.layers.GRU(64, return_sequences=True)(x)
    output = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1))(x)

    return tf.keras.Model([noise_input, label_input], output)


def build_conditional_discriminator(seq_length=512, label_dim=2):
    series_input = tf.keras.Input(shape=(seq_length, 1))
    label_input = tf.keras.Input(shape=(label_dim,))

    # Expand label to (seq_length, label_dim)
    label_expanded = tf.keras.layers.RepeatVector(seq_length)(label_input)
    x = tf.keras.layers.Concatenate()([series_input, label_expanded])

    x = tf.keras.layers.GRU(64, return_sequences=True)(x)
    x = tf.keras.layers.GRU(32)(x)
    x = tf.keras.layers.Dense(64)(x)
    x = tf.keras.layers.LeakyReLU()(x)
    output = tf.keras.layers.Dense(1, activation='sigmoid')(x)

    return tf.keras.Model([series_input, label_input], output)


class ConditionalTimeSeriesGAN(tf.keras.Model):
    def __init__(self, generator, discriminator, latent_dim):
        super().__init__()
        self.generator = generator
        self.discriminator = discriminator
        self.latent_dim = latent_dim
        self.loss_fn = tf.keras.losses.BinaryCrossentropy()
        self.gen_optimizer = tf.keras.optimizers.Adam(1e-4)
        self.disc_optimizer = tf.keras.optimizers.Adam(1e-4)

    def compile(self):
        super().compile()

    @tf.function
    def train_step(self, data):
        real_series, labels = data
        batch_size = tf.shape(real_series)[0]
        noise = tf.random.normal([batch_size, self.latent_dim])

        # Train Discriminator
        with tf.GradientTape() as disc_tape:
            fake_series = self.generator([noise, labels], training=True)
            real_output = self.discriminator(
                [real_series, labels], training=True)
            fake_output = self.discriminator(
                [fake_series, labels], training=True)

            real_labels = tf.ones((batch_size, 1))
            fake_labels = tf.zeros((batch_size, 1))
            disc_loss = self.loss_fn(
                real_labels, real_output) + self.loss_fn(fake_labels, fake_output)

        grads_disc = disc_tape.gradient(
            disc_loss, self.discriminator.trainable_variables)
        self.disc_optimizer.apply_gradients(
            zip(grads_disc, self.discriminator.trainable_variables))

        # Train Generator
        noise = tf.random.normal([batch_size, self.latent_dim])
        with tf.GradientTape() as gen_tape:
            generated_series = self.generator([noise, labels], training=True)
            fake_output = self.discriminator(
                [generated_series, labels], training=True)
            gen_loss = self.loss_fn(tf.ones((batch_size, 1)), fake_output)

        grads_gen = gen_tape.gradient(
            gen_loss, self.generator.trainable_variables)
        self.gen_optimizer.apply_gradients(
            zip(grads_gen, self.generator.trainable_variables))

        return {"gen_loss": gen_loss, "disc_loss": disc_loss}

In [7]:
import matplotlib.pyplot as plt
from scipy.signal import welch
from statsmodels.tsa.stattools import acf
from scipy.stats import lognorm

def plot_psd(series, fs=1, label='PSD'):
    f, Pxx = welch(series, fs=fs, nperseg=256)
    plt.semilogy(f, Pxx, label=label)
    plt.xlabel("Frequency (Hz)")
    plt.ylabel("Power")
    plt.title("Power Spectral Density")
    plt.legend()

def plot_acf(series, lags=100, label='ACF'):
    autocorr = acf(series, nlags=lags, fft=True)
    plt.plot(autocorr, label=label)
    plt.xlabel("Lag")
    plt.ylabel("Autocorrelation")
    plt.title("ACF")
    plt.legend()

def plot_flux_histogram(series, label='Generated'):
    shape, loc, scale = lognorm.fit(series - np.min(series) + 1e-6)
    x = np.linspace(np.min(series), np.max(series), 100)
    pdf = lognorm.pdf(x, shape, loc, scale)

    plt.hist(series, bins=40, density=True, alpha=0.6, label=f"{label} Flux")
    plt.plot(x, pdf, '--', label=f"LogNorm Fit ({label})")
    plt.xlabel("Flux")
    plt.ylabel("Density")
    plt.title("Flux Histogram with Log-normal Fit")
    plt.legend()


In [8]:
from matplotlib.backends.backend_pdf import PdfPages

class GANMonitor(tf.keras.callbacks.Callback):
    def __init__(self, generator, val_dataset, latent_dim, num_samples=3, label=[0, 1], save_dir="gan_outputs"):
        super().__init__()
        self.generator = generator
        self.val_dataset = val_dataset
        self.latent_dim = latent_dim
        self.num_samples = num_samples
        self.label = tf.convert_to_tensor([label] * num_samples, dtype=tf.float32)
        self.save_dir = save_dir
        os.makedirs(save_dir, exist_ok=True)

    def on_epoch_end(self, epoch, logs=None):
        noise = tf.random.normal([self.num_samples, self.latent_dim])
        generated = self.generator([noise, self.label], training=False).numpy().squeeze()
        real_samples = next(iter(self.val_dataset))[0].numpy().squeeze()[:self.num_samples]

        pdf_path = os.path.join(self.save_dir, f"epoch_{epoch+1}.pdf")
        with PdfPages(pdf_path) as pdf:
            for i in range(self.num_samples):
                fig, axes = plt.subplots(1, 3, figsize=(18, 4))

                plt.sca(axes[0])
                plot_psd(generated[i], label='Generated')
                plot_psd(real_samples[i], label='Real')

                plt.sca(axes[1])
                plot_acf(generated[i], label='Generated')
                plot_acf(real_samples[i], label='Real')

                plt.sca(axes[2])
                plot_flux_histogram(generated[i], label='Generated')
                plot_flux_histogram(real_samples[i], label='Real')

                plt.suptitle(f"Sample {i + 1} – Epoch {epoch + 1}")
                plt.tight_layout()

                # ✅ Save this figure as one page in the PDF
                pdf.savefig(fig)
                plt.close(fig)

        print(f"📄 Saved PDF for epoch {epoch + 1} at: {pdf_path}")


In [9]:
import os
os.makedirs("saved_models", exist_ok=True)

csv_logger = tf.keras.callbacks.CSVLogger("saved_models/conditional_gan_training_log.csv", append=False)

In [10]:
# Load data
train_ds, val_ds = load_conditional_dataset(
    "qpo_experiments/dataset_random_amp/data.npz", batch_size=64)

# Build models
latent_dim = 100
generator = build_conditional_generator(latent_dim)
discriminator = build_conditional_discriminator()

gan = ConditionalTimeSeriesGAN(generator, discriminator, latent_dim)
gan.compile()


monitor = GANMonitor(generator, val_dataset=val_ds,
                     latent_dim=latent_dim, label=[0, 1])  # QPO only
gan.fit(train_ds, epochs=100, callbacks=[monitor, csv_logger])

Epoch 1/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 183ms/step - disc_loss: 1.3843 - gen_loss: 0.6989📄 Saved PDF for epoch 1 at: gan_outputs/epoch_1.pdf
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 208ms/step - disc_loss: 1.3835 - gen_loss: 0.7000
Epoch 2/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step - disc_loss: 1.3747 - gen_loss: 0.6981📄 Saved PDF for epoch 2 at: gan_outputs/epoch_2.pdf
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 198ms/step - disc_loss: 1.3739 - gen_loss: 0.6986
Epoch 3/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 174ms/step - disc_loss: 1.3843 - gen_loss: 0.6814📄 Saved PDF for epoch 3 at: gan_outputs/epoch_3.pdf
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 193ms/step - disc_loss: 1.3835 - gen_loss: 0.6825
Epoch 4/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step - disc_loss: 1.3677 - g

<keras.src.callbacks.history.History at 0x78cf64b38250>

In [15]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [12]:
import os
import shutil

# Save generator and discriminator after training
# os.makedirs("saved_models", exist_ok=True)
generator.save("saved_models/qpo_gan_generator.h5")
discriminator.save("saved_models/qpo_gan_discriminator.h5")
# shutil.copyfile('./conditional_gan_training_log.csv', 'drive/MyDrive/saved_models/gan_training_log.csv')



In [13]:
# from tensorflow.keras.models import load_model

# # Load generator and discriminator
# generator = load_model("saved_models/generator_bigru.h5", compile=False)
# discriminator = load_model("saved_models/discriminator_bigru.h5", compile=False)

# generator.summary()  # Optional check

In [16]:
!zip -r /content/drive/MyDrive/saved_models/gan_outputs.zip /content/gan_outputs/
!zip -r /content/drive/MyDrive/saved_models/saved_models.zip /content/saved_models/


  adding: content/gan_outputs/ (stored 0%)
  adding: content/gan_outputs/epoch_54.pdf (deflated 13%)
  adding: content/gan_outputs/epoch_21.pdf (deflated 13%)
  adding: content/gan_outputs/epoch_15.pdf (deflated 13%)
  adding: content/gan_outputs/epoch_32.pdf (deflated 13%)
  adding: content/gan_outputs/epoch_56.pdf (deflated 13%)
  adding: content/gan_outputs/epoch_25.pdf (deflated 13%)
  adding: content/gan_outputs/epoch_67.pdf (deflated 13%)
  adding: content/gan_outputs/epoch_97.pdf (deflated 13%)
  adding: content/gan_outputs/epoch_46.pdf (deflated 13%)
  adding: content/gan_outputs/epoch_95.pdf (deflated 13%)
  adding: content/gan_outputs/epoch_96.pdf (deflated 13%)
  adding: content/gan_outputs/epoch_34.pdf (deflated 13%)
  adding: content/gan_outputs/epoch_59.pdf (deflated 13%)
  adding: content/gan_outputs/epoch_75.pdf (deflated 13%)
  adding: content/gan_outputs/epoch_62.pdf (deflated 13%)
  adding: content/gan_outputs/epoch_2.pdf (deflated 12%)
  adding: content/gan_outputs/