<a href="https://colab.research.google.com/github/supriyag123/PHD_Pub/blob/main/AGENTIC-MODULE2-VAE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# VAE Data Generator Module - Your code with resumability and custom layer fix
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import tensorflow as tf
import os
import math
import plotly.graph_objects as go
import keras
from tensorflow import keras
from keras import layers
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, RepeatVector, TimeDistributed, Input
from keras.models import Model
from keras import saving
import tensorflow.keras.backend as K
from tensorflow.keras.optimizers import *
from tensorflow.keras.callbacks import EarlyStopping
from keras.callbacks import Callback
import plotly
from keras import losses
import plotly.express as px
from statsmodels.tsa.api import VAR
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')

class VAEDataGenerator:
    """
    One-time VAE training and synthetic data generation with resumability
    """

    def __init__(self, output_dir='/content/drive/MyDrive/PHD/2025/TEMP_OUTPUT_METROPM/'):
        self.output_dir = output_dir
        self.encoder = None
        self.decoder = None
        self.vae = None

        # Create checkpoints directory
        self.checkpoint_dir = f"{output_dir}checkpoints/"
        os.makedirs(self.checkpoint_dir, exist_ok=True)

    def register_custom_layers(self):
        """Register custom layers for model loading"""
        # Clear and register custom layers
        saving.get_custom_objects().clear()

        @saving.register_keras_serializable(package="MyLayers")
        class Sampling(layers.Layer):
            """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

            def __init__(self, factor):
                super().__init__()
                self.factor = factor

            def call(self, inputs):
                z_mean, z_log_var = inputs
                batch = tf.shape(z_mean)[0]
                dim = tf.shape(z_mean)[1]
                epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
                return z_mean + tf.exp(0.5 * z_log_var) * epsilon

            def get_config(self):
                return {"factor": self.factor}

        return Sampling

    def load_vae_models(self):
        """Load VAE models with proper custom layer registration"""
        # Register custom layers first
        Sampling = self.register_custom_layers()

        encoder_path = f'{self.output_dir}METROPM_vae-encoder-latent5-dim256.keras'
        decoder_path = f'{self.output_dir}METROPM_vae-decoder-latent5-dim256.keras'

        if os.path.exists(encoder_path) and os.path.exists(decoder_path):
            print("Loading VAE models with custom layers...")
            self.encoder = keras.models.load_model(encoder_path)
            self.decoder = keras.models.load_model(decoder_path)
            print("VAE models loaded successfully!")
            return True
        else:
            print("VAE models not found!")
            return False

    def train_vae_pipeline(self, train_data_path):
        """
        Complete VAE training pipeline
        """
        print("Starting VAE training pipeline...")

        # Load data
        train_data = np.load(train_data_path)
        n_seq = train_data.shape[0]
        window_size = train_data.shape[1]
        n_features = train_data.shape[2]

        maxval = train_data.shape[0]
        count_train = int(math.ceil(0.8*maxval))
        x_train = train_data[:count_train]
        x_test = train_data[count_train:]

        # Clear all previously registered custom objects
        saving.get_custom_objects().clear()

        # Create a custom layer
        @saving.register_keras_serializable(package="MyLayers")
        class Sampling(layers.Layer):
            """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

            def __init__(self, factor):
                super().__init__()
                self.factor = factor

            def call(self, inputs):
                z_mean, z_log_var = inputs
                batch = tf.shape(z_mean)[0]
                dim = tf.shape(z_mean)[1]
                epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
                return z_mean + tf.exp(0.5 * z_log_var) * epsilon

            def get_config(self):
                return {"factor": self.factor}

        # Build the encoder
        latent_dim = 5
        intermediate_dim = 256

        # Encoder
        encoder_inputs =  layers.Input(shape=(window_size, n_features),name="encoder_input")
        x = layers.LSTM(intermediate_dim, activation='tanh', name="lstm1", return_sequences=True)(encoder_inputs)
        xx = layers.LSTM(int(intermediate_dim/2), activation='tanh', name="lstm2", return_sequences=False)(x)
        x1 = layers.Dense(int(intermediate_dim/2), name="dense" )(xx)
        z_mean = layers.Dense(latent_dim, name="z_mean")(x1)
        z_log_var = layers.Dense(latent_dim, name="z_log_var")(x1)
        z = Sampling(1)([z_mean, z_log_var])
        encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
        encoder.summary()

        # Decoder
        inp_z = Input(shape=(latent_dim,),name="decoder")
        x1 = layers.RepeatVector(window_size, name="repeatvect")(inp_z)
        x2= layers.Dense(int(intermediate_dim/2),  name="Dense2")(x1)
        x22= layers.LSTM(int(intermediate_dim/2),activation='tanh', return_sequences=True, name="lstm1")(x2)
        x3 = layers.LSTM(intermediate_dim,activation='tanh', return_sequences=True, name="lstm2")(x22)
        decode_out = layers.TimeDistributed(Dense(n_features), name="decodeout")(x3)
        decoder = keras.Model(inp_z, decode_out, name="decoder")
        decoder.summary()

        reduce_lr = tf.keras.callbacks.LearningRateScheduler(lambda x: 1e-3 * 0.90 ** x)

        # Parameters - your code
        n_epochs = 150
        klstart = 20
        kl_annealtime = n_epochs-klstart
        weight = K.variable(0.0)

        # Define the VAE as a Model with a custom train_step
        class VAE(keras.Model):
            def __init__(self, encoder, decoder, **kwargs):
                super(VAE, self).__init__(**kwargs)
                self.encoder = encoder
                self.decoder = decoder
                self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
                self.reconstruction_loss_tracker = keras.metrics.Mean(
                    name="reconstruction_loss"
                )
                self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

            @property
            def metrics(self):
                return [
                    self.total_loss_tracker,
                    self.reconstruction_loss_tracker,
                    self.kl_loss_tracker,
                ]

            def train_step(self, data):
                with tf.GradientTape() as tape:
                    z_mean, z_log_var, z = self.encoder(data)
                    reconstruction = self.decoder(z)
                    reconstruction_loss = tf.reduce_mean(
                        tf.reduce_sum(
                            losses.mean_squared_error(data, reconstruction), axis=-1),keepdims=True
                        )

                    kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
                    kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
                    total_loss = reconstruction_loss + (weight*kl_loss)
                grads = tape.gradient(total_loss, self.trainable_weights)
                self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
                self.total_loss_tracker.update_state(total_loss)
                self.reconstruction_loss_tracker.update_state(reconstruction_loss)
                self.kl_loss_tracker.update_state(kl_loss)
                return {
                    "loss": self.total_loss_tracker.result(),
                    "reconstruction_loss": self.reconstruction_loss_tracker.result(),
                    "kl_loss": self.kl_loss_tracker.result(),
                }

            def test_step(self, data):
                    z_mean, z_log_var, z = self.encoder(data)
                    reconstruction = self.decoder(z)
                    reconstruction_loss = tf.reduce_mean(
                        tf.reduce_sum(
                            losses.mean_squared_error(data, reconstruction), axis=-1),keepdims=True
                        )

                    kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
                    kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))

                    total_loss = reconstruction_loss + kl_loss

                    return {
                        "loss": self.total_loss_tracker.result(),
                        "reconstruction_loss": self.reconstruction_loss_tracker.result(),
                        "kl_loss": self.kl_loss_tracker.result(),
                          }

        # CALLBACKS
        es = keras.callbacks.EarlyStopping(patience=50, verbose=1, min_delta=0.0001, monitor='loss', mode='auto', restore_best_weights=True)

        class AnnealingCallback(Callback):
            def __init__(self, weight):
                super().__init__()
                self.weight = weight
            def on_epoch_end(self, epoch, logs={}):
                if epoch > klstart and epoch < klstart*1.2:
                    new_weight = min(K.get_value(self.weight) + (1./ kl_annealtime), 1.)
                    K.set_value(self.weight, new_weight)
                print ("Current KL Weight is " + str(K.get_value(self.weight)))

        # Train the VAE
        vae = VAE(encoder, decoder)
        vae.compile(optimizer=keras.optimizers.Adam(clipnorm=1))
        history=vae.fit( x_train,
                         epochs=n_epochs,
                         batch_size=32,
                         validation_split=0.1,
                         callbacks=[AnnealingCallback(weight)])

        # Save models
        encoder.save(f'{self.output_dir}METROPM_vae-encoder-latent5-dim256.keras')
        decoder.save(f'{self.output_dir}METROPM_vae-decoder-latent5-dim256.keras')

        # Store for data generation
        self.encoder = encoder
        self.decoder = decoder
        self.window_size = window_size
        self.n_features = n_features

        print("VAE training complete and models saved!")
        return history

    def generate_synthetic_dataset_fast(self, train_data_path, cohort_size=350000, checkpoint_every=5000):
        """
        FASTER synthetic dataset generation with optimizations
        """
        print(f"Generating synthetic dataset FAST - {cohort_size} samples...")

        # Load models if needed with proper custom layer registration
        if self.encoder is None:
            if not self.load_vae_models():
                raise ValueError("VAE models not found! Train VAE first.")

        # Load and process data
        train_data = np.load(train_data_path)
        window_size = train_data.shape[1]
        n_features = train_data.shape[2]

        # Keep original generator_multiply = 100 (as requested)
        generator_multiply = 100
        print(f"Using generator_multiply = {generator_multiply}")

        # Process all base samples (no reduction)
        max_base_samples = min(cohort_size // generator_multiply, train_data.shape[0])
        if max_base_samples < train_data.shape[0]:
            print(f"Processing {max_base_samples} base samples to reach target {cohort_size}")
            train_data = train_data[:max_base_samples]

        # Get encodings
        print("Computing encodings...")
        X_train_encoded = self.encoder.predict(train_data, batch_size=64, verbose=1)  # Larger batch
        mu, logvar, z = X_train_encoded
        sigma = tf.exp(0.5 * logvar)

        # OPTIMIZATION: Batch processing for decoder
        print("Generating synthetic data in batches...")
        all_results = []
        batch_size = 50  # Process 50 samples at once

        for i in range(0, len(mu), batch_size):
            batch_end = min(i + batch_size, len(mu))
            batch_mu = mu[i:batch_end]
            batch_sigma = sigma[i:batch_end]

            # Generate all Z vectors for this batch
            batch_z = []
            for j in range(len(batch_mu)):
                z_samples = tf.random.normal(
                    shape=(generator_multiply, mu.shape[1]),
                    mean=batch_mu[j],
                    stddev=batch_sigma[j]
                )
                batch_z.append(z_samples)

            # Decode all at once
            all_z = tf.concat(batch_z, axis=0)
            decoded_batch = self.decoder.predict(all_z, batch_size=128, verbose=0)  # Large batch
            decoded_batch = decoded_batch.reshape((decoded_batch.shape[0], window_size * n_features))
            all_results.append(decoded_batch)

            if (i // batch_size + 1) % 10 == 0:
                print(f"Processed {i + batch_size}/{len(mu)} base samples...")

        results1 = np.concatenate(all_results, axis=0)
        np.save(f'{self.output_dir}generated_large_subsquence_data.npy', results1)
        print(f"Fast synthetic dataset complete: {results1.shape}")
        return results1

    def compute_var_windows_fast(self, data_path, start_idx=0, batch_size=5000):
        """
        FASTER VAR computation with optimizations
        """
        print(f"Computing VAR windows FAST for batch starting at {start_idx}...")

        # OPTIMIZATION: Extended window range 2->30 (as requested)
        max_var_lag = 30
        print(f"Testing VAR lags from 2 to {max_var_lag}")

        x = np.load(data_path)
        window_size = self.window_size if self.window_size else 50
        n_features = self.n_features if self.n_features else 13

        x_3d = x.reshape((x.shape[0], window_size, n_features))
        n_future = 1

        end_idx = min(start_idx + batch_size, x_3d.shape[0])
        best_window_for_long_seq = []

        # OPTIMIZATION: Batch processing
        for i in range(start_idx, end_idx):
            rmse_list = []

            # Test lags from 2 to 30 (as requested)
            for k in range(2, min(max_var_lag + 1, window_size//2)):
                cur_seq = x_3d[i,:,:]
                df = pd.DataFrame(cur_seq, columns=['V1','V2','V3','V4','V5','V6','V7','V8','V9','V10','V11','V12','V13'])
                df_train, df_test = df[0:-n_future], df[-n_future:]

                try:
                    model = VAR(df_train)
                    model_fitted1 = model.fit(k)
                    forecast_input1 = df_train.values[-k:]
                    fc1 = model_fitted1.forecast(y=forecast_input1, steps=n_future)
                    df_forecast1 = pd.DataFrame(fc1, index=df.index[-n_future:], columns=df.columns)
                    mse = mean_squared_error(df_test['V1'], df_forecast1['V1'].values)
                    rmse_list.append(mse)
                except:
                    rmse_list.append(99999)

            min_index = rmse_list.index(min(rmse_list))
            min_sw = min_index + 2
            best_window_for_long_seq.append(min_sw)

            if (i - start_idx) % 500 == 0:
                print(f'FAST processed {i - start_idx}/{end_idx - start_idx} sequences...')

        Window = np.array(best_window_for_long_seq)
        batch_file = f'{self.output_dir}generated-data-true-window-BATCH_{start_idx}_{end_idx}.npy'
        np.save(batch_file, Window)
        print(f"FAST VAR windows computed for batch {start_idx}-{end_idx}")
        return Window

    def run_fast_generation(self, train_data_path, target_samples=350000, force_retrain=False):
        """
        Optimized pipeline with your requested changes
        """
        print("="*60)
        print("STARTING OPTIMIZED VAE DATA GENERATION")
        print("="*60)

        # Step 1: Load existing VAE or train
        if force_retrain:
            print("FORCE RETRAIN: Training new VAE models...")
            self.train_vae_pipeline(train_data_path)
        elif not self.load_vae_models():
            print("No existing models found. Training VAE...")
            self.train_vae_pipeline(train_data_path)
        else:
            print("VAE models loaded successfully!")
            # Get data properties
            train_data = np.load(train_data_path)
            self.window_size = train_data.shape[1]
            self.n_features = train_data.shape[2]

        # Step 2: Generate synthetic dataset (full size, generator_multiply=100)
        print(f"Generating {target_samples} synthetic samples...")
        synthetic_data = self.generate_synthetic_dataset_fast(train_data_path, cohort_size=target_samples)

        # Step 3: Compute VAR windows (testing lags 2->30)
        print("Computing VAR windows (testing lags 2-30)...")
        batch_size = 5000
        total_samples = synthetic_data.shape[0]
        all_windows = []

        for start_idx in range(0, total_samples, batch_size):
            batch_windows = self.compute_var_windows_fast(
                f'{self.output_dir}generated_large_subsquence_data.npy',
                start_idx,
                batch_size
            )
            all_windows.append(batch_windows)

        # Combine and save
        final_windows = np.concatenate(all_windows, axis=0)

        # Save with timestamp if retraining
        if force_retrain:
            import datetime
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            np.save(f'{self.output_dir}generated-data-true-window-RETRAINED_{timestamp}.npy', final_windows)
            np.save(f'{self.output_dir}generated-data-RETRAINED_{timestamp}.npy', synthetic_data)
            print(f"Retrained models saved with timestamp: {timestamp}")
        else:
            np.save(f'{self.output_dir}generated-data-true-window-OPTIMIZED.npy', final_windows)
            np.save(f'{self.output_dir}generated-data-OPTIMIZED.npy', synthetic_data)

        print("="*60)
        print("OPTIMIZED VAE DATA GENERATION COMPLETE!")
        print(f"Generated {synthetic_data.shape[0]} synthetic samples")
        print(f"Computed {final_windows.shape[0]} VAR windows")
        print(f"VAR lag range: 2-30")
        print(f"Generator multiply: 100 (kept original)")
        if force_retrain:
            print("🔄 RETRAINED from scratch!")
        print("="*60)

        return {
            'synthetic_data': synthetic_data,
            'var_windows': final_windows
        }

    def evaluate_vae_performance(self, train_data_path, num_samples=5):
        """
        Evaluate VAE reconstruction performance with visualizations
        """
        print("="*60)
        print("EVALUATING VAE RECONSTRUCTION PERFORMANCE")
        print("="*60)

        # Load models if needed
        if self.encoder is None:
            if not self.load_vae_models():
                raise ValueError("VAE models not found! Train VAE first.")

        # Load test data
        train_data = np.load(train_data_path)

        # Take a few samples for evaluation
        test_samples = train_data[:num_samples]
        print(f"Testing reconstruction on {num_samples} samples...")

        # Encode and decode
        encoded = self.encoder.predict(test_samples)
        mu, logvar, z = encoded
        reconstructed = self.decoder.predict(z)

        # Calculate reconstruction metrics
        from sklearn.metrics import mean_squared_error, mean_absolute_error

        mse_scores = []
        mae_scores = []

        for i in range(num_samples):
            original = test_samples[i]
            recon = reconstructed[i]

            mse = mean_squared_error(original.flatten(), recon.flatten())
            mae = mean_absolute_error(original.flatten(), recon.flatten())

            mse_scores.append(mse)
            mae_scores.append(mae)

            print(f"Sample {i+1}: MSE = {mse:.6f}, MAE = {mae:.6f}")

        avg_mse = np.mean(mse_scores)
        avg_mae = np.mean(mae_scores)

        print(f"\nOverall Performance:")
        print(f"Average MSE: {avg_mse:.6f}")
        print(f"Average MAE: {avg_mae:.6f}")

        # Visualize reconstructions
        self.plot_reconstructions(test_samples, reconstructed, num_samples)

        return {
            'mse_scores': mse_scores,
            'mae_scores': mae_scores,
            'avg_mse': avg_mse,
            'avg_mae': avg_mae,
            'original_samples': test_samples,
            'reconstructed_samples': reconstructed
        }

    def plot_reconstructions(self, original, reconstructed, num_samples=5):
        """
        Plot original vs reconstructed subsequences
        """
        import matplotlib.pyplot as plt

        # Determine number of features to plot
        n_features_to_plot = min(4, original.shape[2])  # Plot max 4 features

        fig, axes = plt.subplots(num_samples, n_features_to_plot,
                                figsize=(16, 3*num_samples))

        if num_samples == 1:
            axes = axes.reshape(1, -1)

        for sample_idx in range(num_samples):
            for feature_idx in range(n_features_to_plot):
                ax = axes[sample_idx, feature_idx]

                # Plot original and reconstructed
                original_seq = original[sample_idx, :, feature_idx]
                recon_seq = reconstructed[sample_idx, :, feature_idx]

                ax.plot(original_seq, 'b-', label='Original', linewidth=2)
                ax.plot(recon_seq, 'r--', label='Reconstructed', linewidth=2)

                ax.set_title(f'Sample {sample_idx+1}, Feature {feature_idx+1}')
                ax.set_xlabel('Time Steps')
                ax.set_ylabel('Value')
                ax.legend()
                ax.grid(True, alpha=0.3)

        plt.tight_layout()
        plt.suptitle('VAE Reconstruction Performance', y=1.02, fontsize=16)
        plt.show()

        # Feature-wise reconstruction quality
        self.plot_feature_reconstruction_quality(original, reconstructed)

    def plot_feature_reconstruction_quality(self, original, reconstructed):
        """
        Plot reconstruction quality per feature
        """
        import matplotlib.pyplot as plt

        n_features = original.shape[2]
        mse_per_feature = []

        for feature_idx in range(n_features):
            feature_mse = []
            for sample_idx in range(original.shape[0]):
                orig_feature = original[sample_idx, :, feature_idx]
                recon_feature = reconstructed[sample_idx, :, feature_idx]
                mse = mean_squared_error(orig_feature, recon_feature)
                feature_mse.append(mse)
            mse_per_feature.append(np.mean(feature_mse))

        # Plot feature reconstruction quality
        plt.figure(figsize=(12, 6))

        plt.subplot(1, 2, 1)
        plt.bar(range(1, n_features+1), mse_per_feature)
        plt.xlabel('Feature Index')
        plt.ylabel('Average MSE')
        plt.title('Reconstruction Quality per Feature')
        plt.grid(True, alpha=0.3)

        # Plot latent space representation
        if original.shape[0] > 1:
            encoded = self.encoder.predict(original)
            mu, logvar, z = encoded

            plt.subplot(1, 2, 2)
            plt.scatter(z[:, 0], z[:, 1], c=range(len(z)), cmap='viridis')
            plt.xlabel('Latent Dimension 1')
            plt.ylabel('Latent Dimension 2')
            plt.title('Latent Space Representation')
            plt.colorbar(label='Sample Index')

        plt.tight_layout()
        plt.show()

        print("\nFeature Reconstruction Quality (MSE):")
        for i, mse in enumerate(mse_per_feature):
            print(f"Feature {i+1}: {mse:.6f}")

    def quick_vae_check(self, train_data_path):
        """
        Quick VAE performance check - just print metrics
        """
        print("Running quick VAE performance check...")

        if self.encoder is None:
            if not self.load_vae_models():
                print("❌ VAE models not found!")
                return

        # Load a small sample
        train_data = np.load(train_data_path)
        test_sample = train_data[:10]  # Just 10 samples

        # Test reconstruction
        encoded = self.encoder.predict(test_sample, verbose=0)
        mu, logvar, z = encoded
        reconstructed = self.decoder.predict(z, verbose=0)

        # Calculate metrics
        mse = mean_squared_error(test_sample.flatten(), reconstructed.flatten())
        mae = mean_absolute_error(test_sample.flatten(), reconstructed.flatten())

        print(f"✅ VAE Performance Check:")
        print(f"   MSE: {mse:.6f}")
        print(f"   MAE: {mae:.6f}")
        print(f"   Input shape: {test_sample.shape}")
        print(f"   Output shape: {reconstructed.shape}")

        if mse < 0.1:
            print("✅ Good reconstruction quality!")
        elif mse < 0.5:
            print("⚠️  Moderate reconstruction quality")
        else:
    def force_retrain_pipeline(self, train_data_path, target_samples=350000):
        """
        Force complete retraining - deletes existing models and starts fresh
        """
        print("="*60)
        print("FORCE RETRAINING - STARTING FRESH")
        print("="*60)

        # Delete existing models
        encoder_path = f'{self.output_dir}METROPM_vae-encoder-latent5-dim256.keras'
        decoder_path = f'{self.output_dir}METROPM_vae-decoder-latent5-dim256.keras'

        if os.path.exists(encoder_path):
            os.remove(encoder_path)
            print("🗑️ Deleted existing encoder")

        if os.path.exists(decoder_path):
            os.remove(decoder_path)
            print("🗑️ Deleted existing decoder")

        # Reset internal models
        self.encoder = None
        self.decoder = None
        self.vae = None

        print("Starting fresh VAE training...")

        # Run pipeline with force retrain
        return self.run_fast_generation(train_data_path, target_samples, force_retrain=True)

# Usage - OPTIMIZED VERSION with your specifications
if __name__ == "__main__":
    generator = VAEDataGenerator()

    # OPTION 1: Normal run (uses existing models if available)
    print("Running optimized generation...")
    results = generator.run_fast_generation(
        r'/content/drive/MyDrive/PHD/2025/TEMP_OUTPUT_METROPM/multivariate_long_sequences-TRAIN-AUTO.npy',
        target_samples=350000  # Full size
    )

    # OPTION 2: Force retrain (uncomment to use)
    # print("Force retraining from scratch...")
    # results = generator.run_fast_generation(
    #     r'/content/drive/MyDrive/PHD/2025/TEMP_OUTPUT_METROPM/multivariate_long_sequences-TRAIN-AUTO.npy',
    #     target_samples=350000,
    #     force_retrain=True  # ← Forces retraining
    # )

    # OPTION 3: Complete fresh start (uncomment to use)
    # print("Complete fresh start...")
    # results = generator.force_retrain_pipeline(
    #     r'/content/drive/MyDrive/PHD/2025/TEMP_OUTPUT_METROPM/multivariate_long_sequences-TRAIN-AUTO.npy',
    #     target_samples=350000
    # )

    # OPTIONAL: Check VAE performance after completion
    print("\n" + "="*60)
    print("CHECKING VAE PERFORMANCE")
    print("="*60)

    # Quick check
    generator.quick_vae_check(
        r'/content/drive/MyDrive/PHD/2025/TEMP_OUTPUT_METROPM/multivariate_long_sequences-TRAIN-AUTO.npy'
    )

Epoch 1/1000
[1m4430/4430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 2ms/step - loss: 0.8964 - mean_squared_error: 0.8964 - val_loss: 0.7946 - val_mean_squared_error: 0.7946
Epoch 2/1000
[1m4430/4430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - loss: 0.8044 - mean_squared_error: 0.8044 - val_loss: 0.7901 - val_mean_squared_error: 0.7901
Epoch 3/1000
[1m4430/4430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - loss: 0.7842 - mean_squared_error: 0.7842 - val_loss: 0.7714 - val_mean_squared_error: 0.7714
Epoch 4/1000
[1m4430/4430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - loss: 0.7728 - mean_squared_error: 0.7728 - val_loss: 0.7566 - val_mean_squared_error: 0.7566
Epoch 5/1000
[1m4430/4430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - loss: 0.7643 - mean_squared_error: 0.7643 - val_loss: 0.7566 - val_mean_squared_error: 0.7566
Epoch 6/1000
[1m4430/4430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

In [None]:
from google.colab import drive
drive.mount('/content/drive')