<a href="https://colab.research.google.com/github/supriyag123/PHD_Pub/blob/main/AGENTIC-MODULE4-Sensor-Pretraining.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
def main():
    """Main training pipeline for pre-processed dataset."""

    parser = argparse.ArgumentParser(description="Train sensor anomaly detection models on pre-processed dataset")
    parser.add_argument('--data_path', type=str, required=True,
                       help='Path to pre-processed dataset [num_samples, window_length, num_sensors]')
    parser.add_argument('--models_dir', type=str, default='./trained_models',
                       help='Directory to save models')
    parser.add_argument('--holdout_dir', type=str, default='./holdout_data',
                       help='Directory to save holdout data for streaming simulation')
    parser.add_argument('--window_length', type=int, default=50,
                       help='Expected sequence window length')
    parser.add_argument('--model_type', type=str, default='lstm_autoencoder',
                       choices=['lstm_autoencoder', 'vae'], help='Model type')
    parser.add_argument('--epochs', type=int, default=100, help='Training epochs')
    parser.add_argument('--batch_size', type=int, default=32, help='Batch size')
    parser.add_argument('--sensors', type=str, default=None,
                       help='Comma-separated sensor IDs to train (default: all)')

    args = parser.parse_args()

    print("üöÄ SENSOR MODEL PRE-TRAINING SYSTEM")
    print("=" * 50)
    print(f"üìÇ Data path: {args.data_path}")
    print(f"üíæ Models directory: {args.models_dir}")
    print(f"üì¶ Holdout directory: {args.holdout_dir}")
    print(f"üìè Window length: {args.window_length}")
    print(f"üß† Model type: {args.model_type}")
    print(f"üîÑ Epochs: {args.epochs}")
    print()

    # Load and split dataset
    training_data, holdout_data = load_your_dataset(args.data_path)

    # Validate dataset
    training_data = validate_dataset(training_data, args.window_length)

    # Save holdout data for streaming simulation
    os.makedirs(args.holdout_dir, exist_ok=True)
    holdout_path = os.path.join(args.holdout_dir, 'holdout_data.npy')
    np.save(holdout_path, holdout_data)
    print(f"üíæ Saved holdout data to: {holdout_path}")
    print(f"   Holdout shape: {holdout_data.shape}")

    # Get dataset dimensions
    num_samples, window_length, num_sensors = training_data.shape

    # Filter sensors if specified
    sensor_list = list(range(num_sensors))
    if args.sensors:
        requested_sensors = [int(x.strip()) for x in args.sensors.split(',')]
        sensor_list = [sid for sid in requested_sensors if sid < num_sensors]
        print(f"üéØ Training only sensors: {sensor_list}")

    # Initialize trainer
    trainer = SensorModelTrainer(
        window_length=window_length,
        model_type=args.model_type,
        epochs=args.epochs,
        batch_size=args.batch_size
    )

    # Train models for each sensor
    print(f"\nüèãÔ∏è TRAINING {len(sensor_list)} SENSOR MODELS")
    print("=" * 50)

    training_results = {}
    successful_training = 0

    for sensor_id in sensor_list:
        try:
            print(f"\nüéØ SENSOR {sensor_id}")
            print("-" * 30)

            # Extract data for this sensor: [num_samples, window_length]
            sensor_sequences = training_data[:, :, sensor_id]

            # Train model
            model, training_info = trainer.train_sensor_model(sensor_sequences, sensor_id)

            # Save model
            model_path, metadata_path = trainer.save_model(model, training_info, args.models_dir)

            training_results[sensor_id] = {
                'success': True,
                'model_path': model_path,
                'metadata_path': metadata_path,
                'training_info': training_info
            }

            successful_training += 1

        except Exception as e:
            print(f"  ‚ùå Training failed: {str(e)}")
            training_results[sensor_id] = {
                'success': False,
                'error': str(e)
            }

    # Final summary
    print(f"\nüìä TRAINING SUMMARY")
    print("=" * 50)
    print(f"‚úÖ Successful: {successful_training}/{len(sensor_list)} sensors")
    print(f"üíæ Models saved to: {args.models_dir}")
    print(f"üì¶ Holdout data saved to: {holdout_path}")

    if successful_training > 0:
        print(f"\nüèÜ TRAINED SENSORS:")
        for sensor_id, result in training_results.items():
            if result['success']:
                info = result['training_info']
                print(f"  Sensor {sensor_id}: {info['epochs_trained']} epochs, "
                      f"test loss: {info['test_loss']:.6f}, "
                      f"baseline error: {info['baseline_stats']['mean']:.6f}")

    failed_sensors = [sid for sid, result in training_results.items() if not result['success']]
    if failed_sensors:
        print(f"\n‚ùå FAILED SENSORS: {failed_sensors}")

    # Save comprehensive training summary
    summary_path = os.path.join(args.models_dir, 'training_summary.pkl')
    training_summary = {
        'training_results': training_results,
        'config': vars(args),
        'dataset_info': {
            'original_shape': training_data.shape,
            'holdout_shape': holdout_data.shape,
            'num_sensors': num_sensors,
            'window_length': window_length,
            'total_training_samples': num_samples
        },
        'timestamp': datetime.now(),
        'successful_sensors': successful_training,
        'failed_sensors': failed_sensors,
        'holdout_data_path': holdout_path
    }

    with open(summary_path, 'wb') as f:
        pickle.dump(training_summary, f)

    print(f"\nüíæ Training summary saved to: {summary_path}")
    print(f"‚úÖ PRE-TRAINING COMPLETED!")

    # Instructions for next steps
    print(f"\nüìã NEXT STEPS:")
    print(f"  1. Use trained models: {args.models_dir}")
    print(f"  2. Use holdout data for streaming: {holdout_path}")
    print(f"  3. Run production system with:")
    print(f"     python production_agent_system.py --models_dir {args.models_dir}")


if __name__ == "__main__":
    main()"""
Sensor Model Pre-Training System
================================

This script handles the initial training of sensor models using your real dataset.
Models are trained once and saved to disk for use in the production agent system.

Usage:
    python sensor_pretraining.py --data_path /path/to/dataset --models_dir ./trained_models
"""

import numpy as np
import pandas as pd
import pickle
import os
import argparse
from datetime import datetime
from typing import Dict, List, Tuple
import warnings
warnings.filterwarnings('ignore')

# Deep learning
try:
    import tensorflow as tf
    from tensorflow.keras.models import Sequential, Model
    from tensorflow.keras.layers import LSTM, Dense, RepeatVector, TimeDistributed, Input
    from tensorflow.keras.optimizers import Adam
    from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
    KERAS_AVAILABLE = True
except ImportError:
    print("‚ö†Ô∏è TensorFlow not available. Install with: pip install tensorflow")
    KERAS_AVAILABLE = False

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

class SensorModelTrainer:
    """
    Handles training and saving of individual sensor models.
    """

    def __init__(self,
                 window_length: int = 50,
                 model_type: str = 'lstm_autoencoder',
                 latent_dim: int = 32,
                 epochs: int = 100,
                 batch_size: int = 32,
                 validation_split: float = 0.2):

        self.window_length = window_length
        self.model_type = model_type
        self.latent_dim = latent_dim
        self.epochs = epochs
        self.batch_size = batch_size
        self.validation_split = validation_split

        if not KERAS_AVAILABLE:
            raise ImportError("TensorFlow required for model training")

    def build_lstm_autoencoder(self) -> Model:
        """Build LSTM Autoencoder for anomaly detection."""

        # Encoder
        inputs = Input(shape=(self.window_length, 1), name='encoder_input')
        encoded = LSTM(self.latent_dim, activation='relu', return_sequences=False, name='encoder_lstm')(inputs)

        # Decoder
        decoded = RepeatVector(self.window_length, name='repeat_vector')(encoded)
        decoded = LSTM(self.latent_dim, activation='relu', return_sequences=True, name='decoder_lstm')(decoded)
        outputs = TimeDistributed(Dense(1, activation='linear'), name='decoder_output')(decoded)

        # Create model
        model = Model(inputs, outputs, name='sensor_lstm_autoencoder')
        model.compile(
            optimizer=Adam(learning_rate=0.001),
            loss='mse',
            metrics=['mae']
        )

        return model

    def build_vae(self) -> Model:
        """Build Variational Autoencoder for anomaly detection."""

        # Encoder
        inputs = Input(shape=(self.window_length, 1))
        x = LSTM(self.latent_dim, return_sequences=False)(inputs)

        # Latent space
        z_mean = Dense(self.latent_dim // 2, name='z_mean')(x)
        z_log_var = Dense(self.latent_dim // 2, name='z_log_var')(x)

        # Sampling function
        def sampling(args):
            z_mean, z_log_var = args
            batch = tf.shape(z_mean)[0]
            dim = tf.shape(z_mean)[1]
            epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
            return z_mean + tf.exp(0.5 * z_log_var) * epsilon

        z = tf.keras.layers.Lambda(sampling, name='sampling')([z_mean, z_log_var])

        # Decoder
        decoder_input = RepeatVector(self.window_length)(z)
        decoded = LSTM(self.latent_dim, return_sequences=True)(decoder_input)
        outputs = TimeDistributed(Dense(1))(decoded)

        # VAE model
        model = Model(inputs, outputs, name='sensor_vae')

        # VAE loss
        reconstruction_loss = tf.reduce_mean(tf.square(inputs - outputs))
        kl_loss = -0.5 * tf.reduce_mean(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
        vae_loss = reconstruction_loss + 0.1 * kl_loss
        model.add_loss(vae_loss)

        model.compile(optimizer=Adam(learning_rate=0.001))

        return model

    def prepare_sequences(self, sensor_data: np.ndarray, overlap_ratio: float = 0.5) -> np.ndarray:
        """
        Convert time series data into sequences for training.

        Args:
            sensor_data: 1D time series data for one sensor
            overlap_ratio: Overlap between consecutive sequences (0.0 = no overlap, 0.9 = high overlap)

        Returns:
            Array of sequences [n_sequences, window_length]
        """

        if len(sensor_data) < self.window_length:
            raise ValueError(f"Data length ({len(sensor_data)}) < window_length ({self.window_length})")

        # Calculate step size based on overlap
        step_size = max(1, int(self.window_length * (1 - overlap_ratio)))

        sequences = []
        for i in range(0, len(sensor_data) - self.window_length + 1, step_size):
            sequences.append(sensor_data[i:i + self.window_length])

        return np.array(sequences)

    def train_sensor_model(self, sensor_data: np.ndarray, sensor_id: int) -> Tuple[Model, Dict]:
        """
        Train a model for a single sensor using pre-prepared sequences.

        Args:
            sensor_data: Pre-scaled sequences [num_samples, window_length] for this sensor
            sensor_id: Sensor identifier

        Returns:
            Trained model and training history
        """

        print(f"Training model for sensor {sensor_id}...")
        print(f"  Data shape: {sensor_data.shape}")
        print(f"  Total sequences: {len(sensor_data)}")

        if len(sensor_data) < 100:
            raise ValueError(f"Insufficient sequences ({len(sensor_data)}) for training. Need at least 100.")

        # Data is already in sequence format [num_samples, window_length]
        # Reshape for model input [num_samples, window_length, 1]
        X = sensor_data.reshape(len(sensor_data), self.window_length, 1)

        # Split data into train/validation/test
        # 70% train, 15% validation, 15% test
        n_samples = len(X)
        n_train = int(0.7 * n_samples)
        n_val = int(0.15 * n_samples)

        # Shuffle indices to randomize splits
        indices = np.random.RandomState(42).permutation(n_samples)

        train_idx = indices[:n_train]
        val_idx = indices[n_train:n_train + n_val]
        test_idx = indices[n_train + n_val:]

        X_train = X[train_idx]
        X_val = X[val_idx]
        X_test = X[test_idx]

        print(f"  Train sequences: {len(X_train)}")
        print(f"  Validation sequences: {len(X_val)}")
        print(f"  Test sequences: {len(X_test)}")

        # Build model
        if self.model_type == 'lstm_autoencoder':
            model = self.build_lstm_autoencoder()
        elif self.model_type == 'vae':
            model = self.build_vae()
        else:
            raise ValueError(f"Unknown model type: {self.model_type}")

        print(f"  Model: {self.model_type} with {model.count_params():,} parameters")

        # Callbacks
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
        ]

        # Train model
        start_time = datetime.now()

        if self.model_type == 'vae':
            # VAE training (no target needed due to custom loss)
            history = model.fit(
                X_train, epochs=self.epochs, batch_size=self.batch_size,
                validation_data=(X_val,), callbacks=callbacks, verbose=1
            )
        else:
            # Autoencoder training
            history = model.fit(
                X_train, X_train, epochs=self.epochs, batch_size=self.batch_size,
                validation_data=(X_val, X_val), callbacks=callbacks, verbose=1
            )

        training_time = datetime.now() - start_time
        print(f"  Training completed in {training_time}")

        # Evaluate on test set
        print("  Evaluating on test set...")
        if self.model_type == 'vae':
            test_loss = model.evaluate(X_test, verbose=0)
        else:
            test_loss = model.evaluate(X_test, X_test, verbose=0)

        print(f"  Test loss: {test_loss:.6f}")

        # Compute baseline errors for anomaly detection using validation set
        print("  Computing baseline error statistics...")
        baseline_errors = []

        for i in range(0, len(X_val), self.batch_size):
            batch = X_val[i:i + self.batch_size]
            predictions = model.predict(batch, verbose=0)

            for j, pred in enumerate(predictions):
                error = mean_squared_error(batch[j].flatten(), pred.flatten())
                baseline_errors.append(error)

        baseline_stats = {
            'mean': float(np.mean(baseline_errors)),
            'std': float(np.std(baseline_errors)) + 1e-8,
            'q95': float(np.percentile(baseline_errors, 95)),
            'q99': float(np.percentile(baseline_errors, 99)),
            'min': float(np.min(baseline_errors)),
            'max': float(np.max(baseline_errors))
        }

        training_info = {
            'sensor_id': sensor_id,
            'model_type': self.model_type,
            'window_length': self.window_length,
            'training_sequences': len(X_train),
            'validation_sequences': len(X_val),
            'test_sequences': len(X_test),
            'training_time': str(training_time),
            'final_loss': float(history.history['loss'][-1]),
            'final_val_loss': float(history.history['val_loss'][-1]),
            'test_loss': float(test_loss) if isinstance(test_loss, (int, float)) else float(test_loss[0]),
            'epochs_trained': len(history.history['loss']),
            'baseline_errors': baseline_errors[-100:],  # Store last 100 for drift detection
            'baseline_stats': baseline_stats,
            'trained_at': datetime.now()
        }

        print(f"  ‚úÖ Training successful!")
        print(f"     Final train loss: {training_info['final_loss']:.6f}")
        print(f"     Final val loss: {training_info['final_val_loss']:.6f}")
        print(f"     Test loss: {training_info['test_loss']:.6f}")
        print(f"     Baseline error: {baseline_stats['mean']:.6f} ¬± {baseline_stats['std']:.6f}")

        return model, training_info

    def save_model(self, model: Model, training_info: Dict, models_dir: str):
        """Save trained model and metadata."""

        sensor_id = training_info['sensor_id']
        os.makedirs(models_dir, exist_ok=True)

        # Save model
        model_path = os.path.join(models_dir, f"sensor_{sensor_id}_model.h5")
        model.save(model_path)

        # Save metadata
        metadata_path = os.path.join(models_dir, f"sensor_{sensor_id}_metadata.pkl")
        with open(metadata_path, 'wb') as f:
            pickle.dump(training_info, f)

        print(f"  üíæ Saved model and metadata for sensor {sensor_id}")

        return model_path, metadata_path


def load_your_dataset(data_path: str) -> Tuple[np.ndarray, np.ndarray]:
    """
    Load your pre-processed dataset.

    Args:
        data_path: Path to your dataset

    Returns:
        Tuple of (training_data, holdout_data)
        - training_data: [num_samples, window_length, num_sensors] for model training
        - holdout_data: [1000, window_length, num_sensors] for streaming simulation
    """

    print(f"üìÇ Loading pre-processed dataset from: {data_path}")

    if data_path.endswith('.npy'):
        # Numpy array format
        print("  Format: Numpy array (.npy)")
        data = np.load(data_path)

    elif data_path.endswith('.npz'):
        # Compressed numpy format
        print("  Format: Compressed numpy (.npz)")
        data_file = np.load(data_path)
        # Assume main data is stored with key 'data' or use first key
        if 'data' in data_file:
            data = data_file['data']
        else:
            data = data_file[list(data_file.keys())[0]]

    elif data_path.endswith('.h5') or data_path.endswith('.hdf5'):
        # HDF5 format
        print("  Format: HDF5 file")
        import h5py

        with h5py.File(data_path, 'r') as f:
            # Assume main data is stored with key 'data' or use first key
            if 'data' in f:
                data = f['data'][:]
            else:
                data = f[list(f.keys())[0]][:]

    elif data_path.endswith('.pkl') or data_path.endswith('.pickle'):
        # Pickle format
        print("  Format: Pickle file")
        import pickle
        with open(data_path, 'rb') as f:
            data = pickle.load(f)
            # If pickle contains dict, extract the array
            if isinstance(data, dict):
                if 'data' in data:
                    data = data['data']
                else:
                    data = list(data.values())[0]

    else:
        raise ValueError(f"Unsupported data format: {data_path}. Supported: .npy, .npz, .h5, .hdf5, .pkl")

    # Validate data format
    if not isinstance(data, np.ndarray):
        raise ValueError(f"Data must be numpy array, got {type(data)}")

    if data.ndim != 3:
        raise ValueError(f"Data must be 3D [num_samples, window_length, num_sensors], got {data.ndim}D: {data.shape}")

    num_samples, window_length, num_sensors = data.shape
    print(f"  üìä Dataset shape: {data.shape}")
    print(f"     Total samples: {num_samples:,}")
    print(f"     Window length: {window_length}")
    print(f"     Number of sensors: {num_sensors}")

    # Check if data appears scaled
    data_min, data_max = np.min(data), np.max(data)
    print(f"  üìà Data range: [{data_min:.3f}, {data_max:.3f}]")

    if -10 <= data_min and data_max <= 10:
        print("  ‚úÖ Data appears to be pre-scaled")
    else:
        print("  ‚ö†Ô∏è Data range seems large - verify it's properly scaled")

    # Check for invalid values
    invalid_count = np.sum(~np.isfinite(data))
    if invalid_count > 0:
        print(f"  ‚ö†Ô∏è Found {invalid_count} invalid values (NaN/Inf) - will be handled during training")

    # Split into training and holdout
    if num_samples <= 1000:
        raise ValueError(f"Dataset too small ({num_samples} samples). Need more than 1000 samples.")

    holdout_data = data[-1000:].copy()  # Last 1000 samples
    training_data = data[:-1000].copy()  # Everything except last 1000

    print(f"  üìä Data split:")
    print(f"     Training: {training_data.shape[0]:,} samples")
    print(f"     Holdout: {holdout_data.shape[0]:,} samples")

    return training_data, holdout_data


def validate_dataset(dataset: np.ndarray, window_length: int) -> np.ndarray:
    """
    Validate the pre-processed dataset.

    Args:
        dataset: Pre-processed dataset [num_samples, window_length, num_sensors]
        window_length: Expected window length

    Returns:
        Validated dataset
    """

    print("üîç Validating dataset...")

    num_samples, actual_window_length, num_sensors = dataset.shape

    # Check window length
    if actual_window_length != window_length:
        raise ValueError(f"Window length mismatch: expected {window_length}, got {actual_window_length}")

    # Check for invalid values
    invalid_mask = ~np.isfinite(dataset)
    invalid_count = np.sum(invalid_mask)

    if invalid_count > 0:
        invalid_ratio = invalid_count / dataset.size
        print(f"  ‚ö†Ô∏è Found {invalid_count:,} invalid values ({invalid_ratio:.2%} of total)")

        if invalid_ratio > 0.1:
            raise ValueError(f"Too many invalid values ({invalid_ratio:.1%}). Check data quality.")

        # Replace invalid values with sensor means
        print("  üîß Replacing invalid values with sensor means...")
        for sensor_id in range(num_sensors):
            sensor_data = dataset[:, :, sensor_id]
            valid_data = sensor_data[np.isfinite(sensor_data)]
            if len(valid_data) > 0:
                mean_val = np.mean(valid_data)
                sensor_mask = invalid_mask[:, :, sensor_id]
                dataset[:, :, sensor_id][sensor_mask] = mean_val

    # Statistical validation per sensor
    print(f"  üìä Per-sensor statistics:")
    for sensor_id in range(num_sensors):
        sensor_data = dataset[:, :, sensor_id]

        mean_val = np.mean(sensor_data)
        std_val = np.std(sensor_data)
        min_val = np.min(sensor_data)
        max_val = np.max(sensor_data)

        print(f"    Sensor {sensor_id}: Œº={mean_val:.3f}, œÉ={std_val:.3f}, "
              f"range=[{min_val:.3f}, {max_val:.3f}]")

        # Check for constant values
        if std_val < 1e-6:
            print(f"    ‚ö†Ô∏è Sensor {sensor_id}: Nearly constant values (œÉ={std_val:.2e})")

    print(f"‚úÖ Dataset validation completed")
    return dataset


def main():
    """Main training pipeline."""

    parser = argparse.ArgumentParser(description="Train sensor anomaly detection models")
    parser.add_argument('--data_path', type=str, required=True, help='Path to dataset')
    parser.add_argument('--models_dir', type=str, default='./trained_models', help='Directory to save models')
    parser.add_argument('--window_length', type=int, default=50, help='Sequence window length')
    parser.add_argument('--model_type', type=str, default='lstm_autoencoder',
                       choices=['lstm_autoencoder', 'vae'], help='Model type')
    parser.add_argument('--epochs', type=int, default=100, help='Training epochs')
    parser.add_argument('--batch_size', type=int, default=32, help='Batch size')
    parser.add_argument('--sensors', type=str, default=None,
                       help='Comma-separated sensor IDs to train (default: all)')

    args = parser.parse_args()

    print("üöÄ SENSOR MODEL PRE-TRAINING SYSTEM")
    print("=" * 50)
    print(f"üìÇ Data path: {args.data_path}")
    print(f"üíæ Models directory: {args.models_dir}")
    print(f"üìè Window length: {args.window_length}")
    print(f"üß† Model type: {args.model_type}")
    print(f"üîÑ Epochs: {args.epochs}")
    print()

    # Load dataset
    sensor_data = load_your_dataset(args.data_path)

    # Validate dataset
    validated_data = validate_dataset(sensor_data, args.window_length)

    if not validated_data:
        print("‚ùå No valid sensors found for training")
        return

    # Filter sensors if specified
    if args.sensors:
        requested_sensors = [int(x.strip()) for x in args.sensors.split(',')]
        validated_data = {sid: data for sid, data in validated_data.items()
                         if sid in requested_sensors}
        print(f"üéØ Training only sensors: {list(validated_data.keys())}")

    # Initialize trainer
    trainer = SensorModelTrainer(
        window_length=args.window_length,
        model_type=args.model_type,
        epochs=args.epochs,
        batch_size=args.batch_size
    )

    # Train models
    print(f"\nüèãÔ∏è TRAINING {len(validated_data)} SENSOR MODELS")
    print("=" * 50)

    training_results = {}
    successful_training = 0

    for sensor_id, data in validated_data.items():
        try:
            print(f"\nüéØ SENSOR {sensor_id}")
            print("-" * 30)

            # Train model
            model, training_info = trainer.train_sensor_model(data, sensor_id)

            # Save model
            model_path, metadata_path = trainer.save_model(model, training_info, args.models_dir)

            training_results[sensor_id] = {
                'success': True,
                'model_path': model_path,
                'metadata_path': metadata_path,
                'training_info': training_info
            }

            successful_training += 1

        except Exception as e:
            print(f"  ‚ùå Training failed: {str(e)}")
            training_results[sensor_id] = {
                'success': False,
                'error': str(e)
            }

    # Final summary
    print(f"\nüìä TRAINING SUMMARY")
    print("=" * 50)
    print(f"‚úÖ Successful: {successful_training}/{len(validated_data)} sensors")
    print(f"üíæ Models saved to: {args.models_dir}")

    if successful_training > 0:
        print(f"\nüèÜ TRAINED SENSORS:")
        for sensor_id, result in training_results.items():
            if result['success']:
                info = result['training_info']
                print(f"  Sensor {sensor_id}: {info['epochs_trained']} epochs, "
                      f"final loss: {info['final_loss']:.6f}")

    failed_sensors = [sid for sid, result in training_results.items() if not result['success']]
    if failed_sensors:
        print(f"\n‚ùå FAILED SENSORS: {failed_sensors}")

    # Save training summary
    summary_path = os.path.join(args.models_dir, 'training_summary.pkl')
    with open(summary_path, 'wb') as f:
        pickle.dump({
            'training_results': training_results,
            'config': vars(args),
            'timestamp': datetime.now()
        }, f)

    print(f"\nüíæ Training summary saved to: {summary_path}")
    print(f"‚úÖ PRE-TRAINING COMPLETED!")


if __name__ == "__main__":
    main()

In [None]:
from google.colab import drive
drive.mount('/content/drive')