<a href="https://colab.research.google.com/github/supriyag123/PHD_Pub/blob/main/SensorAgent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ================================================
# COLAB TESTING NOTEBOOK - IoT Sensor Agent System
# ================================================

# Cell 1: Install Dependencies
!pip install tensorflow scikit-learn scipy statsmodels matplotlib seaborn -q

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import pickle
import os
import warnings
warnings.filterwarnings('ignore')

print("✅ Dependencies installed successfully!")

# ================================================
# Cell 2: Core Sensor Agent Classes
# ================================================

# Copy the ENTIRE ProductionSensorAgent and ShadowRetrainingMaster classes here
# (I'll provide a simplified version for testing)

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import LSTM, Dense, RepeatVector, TimeDistributed, Input
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from scipy import stats
from scipy.spatial.distance import jensenshannon
from collections import deque

class SimplifiedSensorAgent:
    """Simplified version for Colab testing."""

    def __init__(self, num_sensors=4, window_length=20, models_dir="./colab_models"):
        self.num_sensors = num_sensors
        self.window_length = window_length
        self.models_dir = models_dir

        os.makedirs(models_dir, exist_ok=True)

        # Per-sensor storage
        self.models = {}
        self.scalers = {}
        self.error_memory = {}
        self.data_memory = {}
        self.rolling_stats = {}
        self.recent_errors = {}

        # Counters
        self.total_processed = {}
        self.anomalies_detected = {}
        self.drift_detected_count = {}

        # Initialize all sensors
        for sensor_id in range(num_sensors):
            self._init_sensor(sensor_id)

    def _init_sensor(self, sensor_id):
        """Initialize sensor containers."""
        self.error_memory[sensor_id] = deque(maxlen=100)
        self.data_memory[sensor_id] = deque(maxlen=100)
        self.recent_errors[sensor_id] = deque(maxlen=30)

        self.rolling_stats[sensor_id] = {'mean': 0.0, 'std': 1.0}
        self.total_processed[sensor_id] = 0
        self.anomalies_detected[sensor_id] = 0
        self.drift_detected_count[sensor_id] = 0

    def _build_model(self):
        """Build simple LSTM autoencoder."""
        inputs = Input(shape=(self.window_length, 1))
        encoded = LSTM(16, activation='relu', return_sequences=False)(inputs)
        decoded = RepeatVector(self.window_length)(encoded)
        decoded = LSTM(16, activation='relu', return_sequences=True)(decoded)
        outputs = TimeDistributed(Dense(1))(decoded)

        model = Model(inputs, outputs)
        model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
        return model

    def train_sensor(self, sensor_id, training_data):
        """Train individual sensor model."""
        print(f"Training sensor {sensor_id}...")

        # Create sequences
        sequences = []
        for i in range(len(training_data) - self.window_length + 1):
            sequences.append(training_data[i:i + self.window_length])

        if len(sequences) < 20:
            print(f"Insufficient data for sensor {sensor_id}")
            return False

        # Fit scaler
        flat_data = np.array(sequences).flatten()
        self.scalers[sensor_id] = StandardScaler()
        self.scalers[sensor_id].fit(flat_data.reshape(-1, 1))

        # Prepare training data
        X_train = []
        for seq in sequences:
            seq_scaled = self.scalers[sensor_id].transform(seq.reshape(-1, 1)).flatten()
            X_train.append(seq_scaled)

        X_train = np.array(X_train).reshape(len(X_train), self.window_length, 1)

        # Build and train model
        self.models[sensor_id] = self._build_model()
        self.models[sensor_id].fit(X_train, X_train, epochs=20, batch_size=8, verbose=0)

        # Initialize statistics
        errors = []
        for seq in X_train[-20:]:
            pred = self.models[sensor_id].predict(seq.reshape(1, self.window_length, 1), verbose=0)
            error = mean_squared_error(seq.flatten(), pred.flatten())
            errors.append(error)

        self.rolling_stats[sensor_id]['mean'] = np.mean(errors)
        self.rolling_stats[sensor_id]['std'] = np.std(errors) + 1e-8

        print(f"✅ Sensor {sensor_id} trained successfully")
        return True

    def process_subsequence(self, subsequence):
        """Process live subsequence [window_length, num_sensors]."""
        if subsequence.shape != (self.window_length, self.num_sensors):
            raise ValueError(f"Expected shape ({self.window_length}, {self.num_sensors}), got {subsequence.shape}")

        results = {
            "timestamp": datetime.now(),
            "sensor_results": {},
            "global_summary": {},
            "retrain_flags": {"has_retrain_requests": False, "retrain_requests": []}
        }

        global_anomalies = 0
        retrain_requests = []

        for sensor_id in range(self.num_sensors):
            sensor_data = subsequence[:, sensor_id]

            # Compute anomaly score
            if sensor_id in self.models and sensor_id in self.scalers:
                data_scaled = self.scalers[sensor_id].transform(sensor_data.reshape(-1, 1)).flatten()
                X = data_scaled.reshape(1, self.window_length, 1)
                reconstruction = self.models[sensor_id].predict(X, verbose=0)
                anomaly_score = mean_squared_error(data_scaled, reconstruction.flatten())
            else:
                anomaly_score = 0.0

            # Update memory
            self.data_memory[sensor_id].append(sensor_data)
            self.error_memory[sensor_id].append(anomaly_score)
            self.recent_errors[sensor_id].append(anomaly_score)

            # Anomaly detection
            threshold = self.rolling_stats[sensor_id]['mean'] + 2.0 * self.rolling_stats[sensor_id]['std']
            is_anomaly = anomaly_score > threshold

            # Drift detection (simplified)
            drift_flag = False
            if len(self.recent_errors[sensor_id]) >= 20:
                recent_mean = np.mean(list(self.recent_errors[sensor_id]))
                historical_mean = self.rolling_stats[sensor_id]['mean']
                drift_flag = recent_mean > 1.5 * historical_mean

            # Update counters
            self.total_processed[sensor_id] += 1
            if is_anomaly:
                self.anomalies_detected[sensor_id] += 1
                global_anomalies += 1

            if drift_flag:
                self.drift_detected_count[sensor_id] += 1

            # Check for retraining needs
            anomaly_rate = self.anomalies_detected[sensor_id] / max(1, self.total_processed[sensor_id])
            needs_retrain = anomaly_rate > 0.15 or drift_flag

            if needs_retrain:
                retrain_requests.append({
                    "sensor_id": sensor_id,
                    "reason": "high_anomaly_rate" if anomaly_rate > 0.15 else "drift_detected",
                    "priority": "high" if anomaly_rate > 0.25 else "medium"
                })

            # Store results
            results["sensor_results"][sensor_id] = {
                "sensor_id": sensor_id,
                "anomaly_score": float(anomaly_score),
                "is_anomaly": bool(is_anomaly),
                "drift_flag": bool(drift_flag),
                "anomaly_rate": anomaly_rate,
                "needs_retrain_flag": needs_retrain
            }

        results["global_summary"] = {
            "total_sensors": self.num_sensors,
            "anomalous_sensors": global_anomalies,
            "global_anomaly_rate": global_anomalies / self.num_sensors
        }

        results["retrain_flags"] = {
            "has_retrain_requests": len(retrain_requests) > 0,
            "retrain_requests": retrain_requests
        }

        return results

    def get_status(self):
        """Get system status."""
        return {
            "total_sensors": self.num_sensors,
            "trained_models": len(self.models),
            "total_processed": sum(self.total_processed.values()),
            "total_anomalies": sum(self.anomalies_detected.values()),
            "sensors_status": {
                sid: {
                    "processed": self.total_processed[sid],
                    "anomalies": self.anomalies_detected[sid],
                    "anomaly_rate": self.anomalies_detected[sid] / max(1, self.total_processed[sid])
                }
                for sid in range(self.num_sensors)
            }
        }

print("✅ Simplified Sensor Agent class defined!")

# ================================================
# Cell 3: Generate Test Data
# ================================================

def generate_test_data(num_sensors=4, num_samples=1000, anomaly_prob=0.05):
    """Generate realistic IoT sensor data for testing."""

    print(f"Generating test data: {num_sensors} sensors, {num_samples} samples")

    # Create different patterns for each sensor
    sensor_data = {}

    for sensor_id in range(num_sensors):
        t = np.linspace(0, 20*np.pi, num_samples)

        # Different base patterns per sensor
        if sensor_id == 0:
            # Temperature-like pattern
            signal = 20 + 5*np.sin(t/10) + 2*np.sin(t) + np.random.normal(0, 0.5, num_samples)
        elif sensor_id == 1:
            # Vibration-like pattern
            signal = np.sin(3*t) + 0.5*np.sin(10*t) + np.random.normal(0, 0.3, num_samples)
        elif sensor_id == 2:
            # Pressure-like pattern
            signal = 100 + 10*np.cos(t/5) + np.random.normal(0, 1, num_samples)
        else:
            # Flow-like pattern
            signal = 50 + 15*np.sin(t/8) + 3*np.cos(2*t) + np.random.normal(0, 0.8, num_samples)

        # Add anomalies
        anomaly_indices = np.random.choice(num_samples, int(anomaly_prob * num_samples), replace=False)
        signal[anomaly_indices] += np.random.normal(0, 5, len(anomaly_indices))

        sensor_data[sensor_id] = signal

    return sensor_data

# Generate training and test data
print("🔄 Generating test datasets...")
train_data = generate_test_data(num_sensors=4, num_samples=800, anomaly_prob=0.03)
test_data = generate_test_data(num_sensors=4, num_samples=400, anomaly_prob=0.08)

print("✅ Test data generated!")

# ================================================
# Cell 4: Visualize Data
# ================================================

def plot_sensor_data(sensor_data, title="Sensor Data"):
    """Plot sensor data for visualization."""
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    axes = axes.flatten()

    for sensor_id in range(min(4, len(sensor_data))):
        axes[sensor_id].plot(sensor_data[sensor_id][:200])  # Plot first 200 points
        axes[sensor_id].set_title(f'Sensor {sensor_id}')
        axes[sensor_id].set_xlabel('Time Steps')
        axes[sensor_id].set_ylabel('Value')
        axes[sensor_id].grid(True, alpha=0.3)

    plt.suptitle(title, fontsize=16)
    plt.tight_layout()
    plt.show()

# Visualize training data
print("📊 Visualizing training data...")
plot_sensor_data(train_data, "Training Data - First 200 Points")

# ================================================
# Cell 5: Train Models
# ================================================

print("🚀 Starting model training...")

# Create and train sensor agent
agent = SimplifiedSensorAgent(num_sensors=4, window_length=20)

# Train each sensor
training_success = {}
for sensor_id in range(4):
    success = agent.train_sensor(sensor_id, train_data[sensor_id])
    training_success[sensor_id] = success

print("\n📋 Training Results:")
for sensor_id, success in training_success.items():
    status = "✅ Success" if success else "❌ Failed"
    print(f"  Sensor {sensor_id}: {status}")

# ================================================
# Cell 6: Live Processing Simulation
# ================================================

print("\n🔴 LIVE PROCESSING SIMULATION")
print("=" * 50)

# Simulate live data stream
window_length = 20
results_log = []
anomaly_timeline = []

# Process test data as live stream
for step in range(len(test_data[0]) - window_length + 1):
    # Create subsequence [window_length, num_sensors]
    subsequence = np.array([
        [test_data[sensor_id][step + i] for sensor_id in range(4)]
        for i in range(window_length)
    ])  # Shape: [20, 4]

    # Process through agent
    results = agent.process_subsequence(subsequence)

    # Log results
    results_log.append(results)

    # Track anomalies
    anomaly_count = results["global_summary"]["anomalous_sensors"]
    anomaly_timeline.append(anomaly_count)

    # Print significant events
    if anomaly_count > 0:
        print(f"Step {step:3d}: 🚨 {anomaly_count} anomalous sensors detected")
        for sensor_id, result in results["sensor_results"].items():
            if result["is_anomaly"]:
                print(f"         Sensor {sensor_id}: Score {result['anomaly_score']:.3f}")

    if results["retrain_flags"]["has_retrain_requests"]:
        requests = results["retrain_flags"]["retrain_requests"]
        print(f"Step {step:3d}: 🔄 {len(requests)} sensors need retraining")
        for req in requests:
            print(f"         Sensor {req['sensor_id']}: {req['reason']} ({req['priority']} priority)")

    # Progress indicator
    if step > 0 and step % 50 == 0:
        status = agent.get_status()
        print(f"\n--- STEP {step} STATUS ---")
        print(f"Total processed: {status['total_processed']}")
        print(f"Total anomalies: {status['total_anomalies']}")
        print(f"Global anomaly rate: {status['total_anomalies']/status['total_processed']:.3f}")
        print("----------------------\n")

print(f"\n✅ Processed {len(results_log)} time steps")

# ================================================
# Cell 7: Results Analysis & Visualization
# ================================================

print("\n📊 RESULTS ANALYSIS")
print("=" * 40)

# Extract metrics
steps = list(range(len(results_log)))
global_anomaly_rates = [r["global_summary"]["global_anomaly_rate"] for r in results_log]
retrain_requests = [len(r["retrain_flags"]["retrain_requests"]) for r in results_log]

# Per-sensor anomaly scores
sensor_scores = {sid: [] for sid in range(4)}
sensor_anomalies = {sid: [] for sid in range(4)}

for result in results_log:
    for sensor_id in range(4):
        sensor_result = result["sensor_results"][sensor_id]
        sensor_scores[sensor_id].append(sensor_result["anomaly_score"])
        sensor_anomalies[sensor_id].append(1 if sensor_result["is_anomaly"] else 0)

# Plot results
fig, axes = plt.subplots(3, 2, figsize=(15, 12))

# Global anomaly rate over time
axes[0, 0].plot(steps, global_anomaly_rates, 'r-', alpha=0.7)
axes[0, 0].set_title('Global Anomaly Rate Over Time')
axes[0, 0].set_xlabel('Time Step')
axes[0, 0].set_ylabel('Anomaly Rate')
axes[0, 0].grid(True, alpha=0.3)

# Retrain requests over time
axes[0, 1].plot(steps, retrain_requests, 'orange', alpha=0.7)
axes[0, 1].set_title('Retraining Requests Over Time')
axes[0, 1].set_xlabel('Time Step')
axes[0, 1].set_ylabel('Number of Requests')
axes[0, 1].grid(True, alpha=0.3)

# Anomaly scores per sensor
colors = ['blue', 'green', 'red', 'purple']
for i, sensor_id in enumerate(range(4)):
    axes[1, 0].plot(steps, sensor_scores[sensor_id], color=colors[i],
                   alpha=0.6, label=f'Sensor {sensor_id}')
axes[1, 0].set_title('Anomaly Scores by Sensor')
axes[1, 0].set_xlabel('Time Step')
axes[1, 0].set_ylabel('Anomaly Score')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Anomaly detection flags
for i, sensor_id in enumerate(range(4)):
    axes[1, 1].plot(steps, sensor_anomalies[sensor_id], color=colors[i],
                   alpha=0.7, label=f'Sensor {sensor_id}')
axes[1, 1].set_title('Anomaly Detections by Sensor')
axes[1, 1].set_xlabel('Time Step')
axes[1, 1].set_ylabel('Anomaly Flag')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

# Anomaly distribution histogram
all_scores = [score for sensor_scores_list in sensor_scores.values() for score in sensor_scores_list]
axes[2, 0].hist(all_scores, bins=50, alpha=0.7, color='skyblue')
axes[2, 0].set_title('Distribution of Anomaly Scores')
axes[2, 0].set_xlabel('Anomaly Score')
axes[2, 0].set_ylabel('Frequency')
axes[2, 0].grid(True, alpha=0.3)

# Final sensor status
final_status = agent.get_status()
sensor_ids = list(range(4))
anomaly_rates = [final_status["sensors_status"][sid]["anomaly_rate"] for sid in sensor_ids]

axes[2, 1].bar(sensor_ids, anomaly_rates, color=colors, alpha=0.7)
axes[2, 1].set_title('Final Anomaly Rates by Sensor')
axes[2, 1].set_xlabel('Sensor ID')
axes[2, 1].set_ylabel('Anomaly Rate')
axes[2, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# ================================================
# Cell 8: Final Summary
# ================================================

print("\n🎯 FINAL SUMMARY")
print("=" * 50)

final_status = agent.get_status()

print(f"📊 SYSTEM PERFORMANCE:")
print(f"  • Total sensors: {final_status['total_sensors']}")
print(f"  • Models trained: {final_status['trained_models']}")
print(f"  • Total samples processed: {final_status['total_processed']}")
print(f"  • Total anomalies detected: {final_status['total_anomalies']}")
print(f"  • Global anomaly rate: {final_status['total_anomalies']/final_status['total_processed']:.1%}")

print(f"\n📋 PER-SENSOR BREAKDOWN:")
for sensor_id in range(4):
    status = final_status["sensors_status"][sensor_id]
    print(f"  Sensor {sensor_id}: {status['processed']} processed, "
          f"{status['anomalies']} anomalies ({status['anomaly_rate']:.1%})")

# Count retraining requests
total_retrain_requests = sum(len(r["retrain_flags"]["retrain_requests"]) for r in results_log)
steps_with_requests = sum(1 for r in results_log if r["retrain_flags"]["has_retrain_requests"])

print(f"\n🔄 RETRAINING ANALYSIS:")
print(f"  • Total retrain requests: {total_retrain_requests}")
print(f"  • Steps with retrain requests: {steps_with_requests}")
print(f"  • Average requests per step: {total_retrain_requests/len(results_log):.2f}")

print(f"\n✅ COLAB TEST COMPLETED SUCCESSFULLY!")
print(f"🎉 Your sensor agent system is working correctly!")

# ================================================
# Cell 9: Test Different Scenarios (Optional)
# ================================================

def test_extreme_anomaly_scenario():
    """Test with extreme anomalies to verify detection."""
    print("\n🧪 TESTING EXTREME ANOMALY SCENARIO")
    print("-" * 40)

    # Create data with obvious anomalies
    extreme_data = {}
    for sensor_id in range(4):
        normal_signal = np.sin(np.linspace(0, 4*np.pi, 100)) + np.random.normal(0, 0.1, 100)
        # Add massive anomalies
        normal_signal[25:27] += 10  # Big spike
        normal_signal[50:52] -= 8   # Big dip
        normal_signal[75:77] += 12  # Another spike
        extreme_data[sensor_id] = normal_signal

    # Process extreme data
    window_length = 20
    extreme_anomalies = 0

    for step in range(len(extreme_data[0]) - window_length + 1):
        subsequence = np.array([
            [extreme_data[sensor_id][step + i] for sensor_id in range(4)]
            for i in range(window_length)
        ])

        results = agent.process_subsequence(subsequence)

        if results["global_summary"]["anomalous_sensors"] > 0:
            extreme_anomalies += 1
            print(f"Step {step}: Detected {results['global_summary']['anomalous_sensors']} anomalies")

    print(f"✅ Extreme anomaly test: {extreme_anomalies} anomalous steps detected")
    return extreme_anomalies > 5  # Should detect several anomalies

# Run extreme test
extreme_test_passed = test_extreme_anomaly_scenario()
print(f"🧪 Extreme anomaly test: {'PASSED' if extreme_test_passed else 'FAILED'}")

print(f"\n🏁 ALL TESTS COMPLETED!")


🔴 LIVE PROCESSING SIMULATION
Step   0: 🚨 4 anomalous sensors detected
         Sensor 0: Score 0.082
         Sensor 1: Score 3.012
         Sensor 2: Score 0.189
         Sensor 3: Score 0.162
Step   0: 🔄 4 sensors need retraining
         Sensor 0: high_anomaly_rate (high priority)
         Sensor 1: high_anomaly_rate (high priority)
         Sensor 2: high_anomaly_rate (high priority)
         Sensor 3: high_anomaly_rate (high priority)
Step   1: 🚨 4 anomalous sensors detected
         Sensor 0: Score 0.099
         Sensor 1: Score 1.950
         Sensor 2: Score 0.191
         Sensor 3: Score 0.163
Step   1: 🔄 4 sensors need retraining
         Sensor 0: high_anomaly_rate (high priority)
         Sensor 1: high_anomaly_rate (high priority)
         Sensor 2: high_anomaly_rate (high priority)
         Sensor 3: high_anomaly_rate (high priority)
Step   2: 🚨 4 anomalous sensors detected
         Sensor 0: Score 0.142
         Sensor 1: Score 1.897
         Sensor 2: Score 0.166
      

In [None]:
from google.colab import drive
drive.mount('/content/drive')