<a href="https://colab.research.google.com/github/supriyag123/PHD_Pub/blob/main/AGENTIC-MODULE3-MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# agents/adaptive_window_agent.py
import numpy as np
import pandas as pd
import pickle
import json
import os
from collections import deque, defaultdict
from typing import Dict, List, Tuple, Optional, Any
import datetime as dt
import logging
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from statsmodels.tsa.vector_ar.var_model import VAR
import keras
from keras.models import Sequential, load_model
from keras.layers import Dense
from keras.callbacks import EarlyStopping
import tensorflow as tf
from scipy import stats
import threading
import queue
import warnings
warnings.filterwarnings('ignore')

logger = logging.getLogger(__name__)

class AdaptiveWindowAgent:
    """
    Agent A: Adaptive Window Management with Enhanced MLP

    Capabilities:
    1. Invoke new data and score using trained MLP
    2. Calculate actual performance using VAR forecast
    3. Track accuracy and performance statistics
    4. Monitor for drift in prediction performance
    5. Communicate with sensor agents to verify drift
    6. Retrain MLP when drift is confirmed
    """

    def __init__(self, agent_id: str = "adaptive_window_agent",
                 model_path: str = None,
                 checkpoint_path: str = None):
        self.agent_id = agent_id
        self.model_path = model_path or "/content/drive/MyDrive/PHD/2025/DGRNet-MLP-Versions/METROPM_MLP_model_Daily.keras"
        self.checkpoint_path = checkpoint_path or "/content/drive/MyDrive/PHD/2025/TEMP_OUTPUT_METROPM/ckp2.weights.h5"

        # Core MLP components
        self.model = None
        self.transformer = StandardScaler()
        self.transformer_fitted = False  # Flag to track if transformer is fitted
        self.is_model_loaded = False
        self._ground_truth_buffer = []

        # Performance tracking
        self.prediction_history = deque(maxlen=1000)
        self.accuracy_history = deque(maxlen=200)
        self.r2_history = deque(maxlen=200)
        self.mse_history = deque(maxlen=200)

        # Drift detection parameters
        self.drift_detection_window = 50
        self.drift_threshold_r2 = 0.1
        self.drift_threshold_mse = 0.2
        self.consecutive_poor_predictions = 0
        self.drift_confirmed = False

        # Message queue for agent communication
        self.message_queue = queue.Queue()
        self.sensor_agents = {}

        # Statistics storage
        self.performance_stats = {
            'total_predictions': 0,
            'avg_r2': 0.0,
            'avg_mse': 0.0,
            'avg_mae': 0.0,
            'last_retrain_time': None,
            'drift_events': 0,
            'retraining_events': 0
        }

        # Retraining data storage
        self.retraining_data = {
            'x_buffer': deque(maxlen=10000),
            'y_buffer': deque(maxlen=10000)
        }

        self.load_model()
        print(f"AdaptiveWindowAgent {self.agent_id} initialized")
        print(f"Model loaded: {self.is_model_loaded}")

    def load_model(self):
        """Load trained MLP model and recreate transformer using original training data"""
        try:
            if os.path.exists(self.model_path):
                self.model = keras.models.load_model(self.model_path)
                print(f"Loaded MLP model from {self.model_path}")
                self.is_model_loaded = True

                # Try to load saved transformer first
                transformer_path = self.model_path.replace('.keras', '_transformer.pkl')
                if os.path.exists(transformer_path):
                    with open(transformer_path, 'rb') as f:
                        self.transformer = pickle.load(f)
                    self.transformer_fitted = True
                    print("Loaded saved transformer")
                else:
                    # Option 2: Recreate transformer from original training data
                    print("No saved transformer found, recreating from original training data...")

                    try:
                        # Load your original y training data
                        y_original = np.load('/content/drive/MyDrive/PHD/2025/TEMP_OUTPUT_METROPM/generated-data-true-window2.npy')

                        # Fit transformer on original training data (same as your training code)
                        self.transformer = StandardScaler()
                        self.transformer.fit(y_original.reshape(-1, 1))
                        self.transformer_fitted = True

                        # Save it for future use
                        with open(transformer_path, 'wb') as f:
                            pickle.dump(self.transformer, f)

                        print(f"Fitted transformer on {len(y_original)} original training samples and saved")

                    except Exception as e:
                        print(f"Could not load original training data: {e}")
                        self.transformer = StandardScaler()
                        self.transformer_fitted = False

            else:
                print(f"Model file not found at {self.model_path}")
                self.is_model_loaded = False

        except Exception as e:
            print(f"Error loading model: {e}")
            self.is_model_loaded = False

    def _fit_transformer_if_needed(self, y_values: List[float]):
        """Fit transformer on representative data if not already fitted"""
        if not self.transformer_fitted and len(y_values) >= 10:
            try:
                # Fit transformer on available y values
                y_array = np.array(y_values).reshape(-1, 1)
                self.transformer.fit(y_array)
                self.transformer_fitted = True
                print(f"Fitted transformer on {len(y_values)} representative samples")

                # Optionally save the fitted transformer for future use
                transformer_path = self.model_path.replace('.keras', '_transformer.pkl')
                with open(transformer_path, 'wb') as f:
                    pickle.dump(self.transformer, f)
                print(f"Saved fitted transformer to {transformer_path}")

            except Exception as e:
                logger.error(f"Failed to fit transformer: {e}")

    def calculate_var_ground_truth(self, sequence_3d: np.ndarray, n_future: int = 1) -> int:
        """
        Calculate ground truth window size using your EXACT VAR analysis logic
        """
        # Your exact VAR analysis logic
        rmse_list = []
        K = sequence_3d.shape[0]  # K is the number of timesteps (50 in your case)

        for k in range(2, round(K)):
            cur_seq = sequence_3d
            df = pd.DataFrame(cur_seq, columns=['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12'])
            df_train, df_test = df[0:-n_future], df[-n_future:]
            model = VAR(df_train)
            try:
                model_fitted1 = model.fit(k)
                forecast_input1 = df_train.values[-k:]
                fc1 = model_fitted1.forecast(y=forecast_input1, steps=n_future)
                df_forecast1 = pd.DataFrame(fc1, index=df.index[-n_future:], columns=df.columns)
                mse = mean_squared_error(df_test['V1'], df_forecast1['V1'].values)
                rmse_list.append(mse)
            except:
                rmse_list.append(99999)

        if rmse_list:
            min_index = rmse_list.index(min(rmse_list))
            min_sw = min_index + 2
            return min_sw
        else:
            return 20

    def predict_window_size(self, feature_vector: np.ndarray, sequence_3d: Optional[np.ndarray] = None,
                           actual_window: Optional[int] = None) -> Dict[str, Any]:
        """
        Enhanced prediction method that can:
        1. Use your MLP for prediction
        2. Calculate VAR-based ground truth if sequence data provided
        3. Track performance against VAR ground truth
        """
        if not self.is_model_loaded:
            return {
                'predicted_window': 20,
                'confidence': 0.0,
                'error': "Model not loaded"
            }

        try:
            # Ensure feature vector is 2D
            if feature_vector.ndim == 1:
                feature_vector = feature_vector.reshape(1, -1)

            # 1. INVOKE NEW DATA AND SCORE USING MLP
            prediction_raw = self.model.predict(feature_vector, verbose=0)

            # 2. CALCULATE VAR-BASED GROUND TRUTH if sequence provided
            var_ground_truth = None
            if sequence_3d is not None:
                var_ground_truth = self.calculate_var_ground_truth(sequence_3d)
            elif actual_window is not None:
                var_ground_truth = actual_window

            # 3. HANDLE TRANSFORMER SCALING
            if var_ground_truth is not None:
                # Collect ground truth values for transformer fitting
                self._ground_truth_buffer.append(var_ground_truth)

                # Fit transformer if not already fitted
                if not self.transformer_fitted:
                    self._fit_transformer_if_needed(self._ground_truth_buffer)

            # Transform prediction back to original scale only if transformer is fitted
            if self.transformer_fitted:
                predicted_window = self.transformer.inverse_transform(prediction_raw)[0, 0]
            else:
                # Use raw prediction if transformer not fitted yet
                predicted_window = prediction_raw[0, 0]
                logger.warning("Transformer not fitted yet, using raw prediction")

            predicted_window = max(5, min(50, int(predicted_window)))

            # Create prediction record
            prediction_record = {
                'timestamp': dt.datetime.now(),
                'predicted_window': predicted_window,
                'feature_vector': feature_vector.flatten(),
                'raw_prediction': prediction_raw[0, 0],
                'var_ground_truth': var_ground_truth,
                'transformer_fitted': self.transformer_fitted
            }

            # 4. CALCULATE ACTUAL PERFORMANCE AGAINST VAR GROUND TRUTH
            if var_ground_truth is not None:
                absolute_error = abs(predicted_window - var_ground_truth)
                relative_error = absolute_error / max(var_ground_truth, 1)
                accuracy = max(0, 1 - relative_error)

                self.accuracy_history.append(accuracy)

                # Calculate metrics for recent predictions
                if len(self.prediction_history) >= 10:
                    recent_predictions = [p['predicted_window'] for p in list(self.prediction_history)[-10:]
                                        if p['var_ground_truth'] is not None]
                    recent_ground_truths = [p['var_ground_truth'] for p in list(self.prediction_history)[-10:]
                                          if p['var_ground_truth'] is not None]

                    if len(recent_predictions) >= 5:
                        r2 = r2_score(recent_ground_truths, recent_predictions)
                        mse = mean_squared_error(recent_ground_truths, recent_predictions)
                        mae = mean_absolute_error(recent_ground_truths, recent_predictions)

                        self.r2_history.append(r2)
                        self.mse_history.append(mse)

                        self.performance_stats.update({
                            'total_predictions': self.performance_stats['total_predictions'] + 1,
                            'avg_r2': np.mean(self.r2_history),
                            'avg_mse': np.mean(self.mse_history),
                            'avg_mae': mae
                        })

                        prediction_record.update({
                            'absolute_error': absolute_error,
                            'relative_error': relative_error,
                            'accuracy': accuracy,
                            'recent_r2': r2,
                            'recent_mse': mse,
                            'recent_mae': mae
                        })

                        # 5. CHECK FOR DRIFT
                        drift_detected = self._check_for_drift()
                        prediction_record['drift_detected'] = drift_detected

                        if drift_detected:
                            prediction_record['drift_action'] = self._handle_drift_detection(feature_vector, var_ground_truth)

            # Store prediction
            self.prediction_history.append(prediction_record)

            # Add to retraining buffer
            self.retraining_data['x_buffer'].append(feature_vector.flatten())
            if var_ground_truth is not None:
                # Store raw ground truth, transform only when needed during retraining
                self.retraining_data['y_buffer'].append(var_ground_truth)

            return {
                'predicted_window': predicted_window,
                'var_ground_truth': var_ground_truth,
                'confidence': self._calculate_confidence(prediction_record),
                'performance_stats': self.get_recent_performance(),
                'drift_detected': prediction_record.get('drift_detected', False),
                'prediction_id': len(self.prediction_history),
                'transformer_status': 'fitted' if self.transformer_fitted else 'not_fitted'
            }

        except Exception as e:
            logger.error(f"Prediction error: {e}")
            return {
                'predicted_window': 20,
                'confidence': 0.0,
                'error': str(e)
            }

    def _check_for_drift(self) -> bool:
        """Monitor and identify drift in prediction performance"""
        if len(self.r2_history) < self.drift_detection_window:
            return False

        try:
            # Get recent and historical performance
            recent_r2 = np.mean(list(self.r2_history)[-20:])
            historical_r2 = np.mean(list(self.r2_history)[-self.drift_detection_window:-20])

            recent_mse = np.mean(list(self.mse_history)[-20:])
            historical_mse = np.mean(list(self.mse_history)[-self.drift_detection_window:-20])

            # Check for significant performance degradation
            r2_drop = historical_r2 - recent_r2
            mse_increase = recent_mse / max(historical_mse, 0.001) - 1

            # Drift conditions
            r2_drift = r2_drop > self.drift_threshold_r2
            mse_drift = mse_increase > self.drift_threshold_mse

            # Track consecutive poor predictions
            recent_accuracy = np.mean(list(self.accuracy_history)[-10:]) if len(self.accuracy_history) >= 10 else 1.0
            if recent_accuracy < 0.7:
                self.consecutive_poor_predictions += 1
            else:
                self.consecutive_poor_predictions = 0

            consecutive_drift = self.consecutive_poor_predictions > 10

            # Drift detected if multiple conditions met
            drift_detected = (r2_drift and mse_drift) or consecutive_drift

            if drift_detected:
                logger.warning(f"Drift detected: R2 drop={r2_drop:.3f}, MSE increase={mse_increase:.3f}, "
                             f"Consecutive poor predictions={self.consecutive_poor_predictions}")
                self.performance_stats['drift_events'] += 1

            return drift_detected

        except Exception as e:
            logger.error(f"Drift detection error: {e}")
            return False

    def _handle_drift_detection(self, current_features: np.ndarray, actual_window: int) -> str:
        """Check with sensor agents if really drifting"""
        if self.drift_confirmed:
            return "Already handling drift"

        # Query sensor agents for their drift status
        sensor_drift_confirmations = self._query_sensor_agents_for_drift()

        # If majority of sensors also detect drift, confirm and retrain
        if sensor_drift_confirmations >= len(self.sensor_agents) * 0.6:
            self.drift_confirmed = True
            logger.info("Drift confirmed by sensor agents. Initiating retraining...")

            # Retrain MLP
            retrain_success = self._retrain_model()

            if retrain_success:
                self.drift_confirmed = False
                self.consecutive_poor_predictions = 0
                self.performance_stats['retraining_events'] += 1
                self.performance_stats['last_retrain_time'] = dt.datetime.now()
                return "Retraining completed successfully"
            else:
                return "Retraining failed"
        else:
            return f"Drift suspected but not confirmed by sensors ({sensor_drift_confirmations}/{len(self.sensor_agents)})"

    def _query_sensor_agents_for_drift(self) -> int:
        """Query sensor agents to confirm drift"""
        confirmations = 0

        for sensor_id, sensor_agent in self.sensor_agents.items():
            try:
                # This would be actual message passing in full implementation
                sensor_drift = self._simulate_sensor_drift_check(sensor_id)
                if sensor_drift:
                    confirmations += 1
            except Exception as e:
                logger.error(f"Error querying sensor {sensor_id}: {e}")

        return confirmations

    def _simulate_sensor_drift_check(self, sensor_id: str) -> bool:
        """Simulate sensor drift check (replace with actual message passing)"""
        return np.random.random() > 0.7

    def _retrain_model(self) -> bool:
        """Retrain MLP and reinstate new model"""
        try:
            logger.info("Starting MLP retraining...")

            if len(self.retraining_data['x_buffer']) < 100:
                logger.warning("Insufficient data for retraining")
                return False

            # Prepare retraining data
            X_retrain = np.array(list(self.retraining_data['x_buffer']))
            y_raw = np.array(list(self.retraining_data['y_buffer']))

            # Transform y data for training if transformer is fitted
            if self.transformer_fitted:
                y_retrain = self.transformer.transform(y_raw.reshape(-1, 1)).flatten()
            else:
                y_retrain = y_raw

            # Create new model with same architecture
            new_model = Sequential()
            new_model.add(Dense(64, activation='relu', input_shape=(X_retrain.shape[1],)))
            new_model.add(Dense(32, activation='relu'))
            new_model.add(Dense(16, activation='relu'))
            new_model.add(Dense(8, activation='relu'))
            new_model.add(Dense(1))

            optimizer = keras.optimizers.Adam(learning_rate=0.0003, clipnorm=1)
            new_model.compile(loss='mean_squared_error', optimizer=optimizer,
                            metrics=['mean_squared_error'])

            es = keras.callbacks.EarlyStopping(
                patience=10, verbose=0, min_delta=0.0001,
                monitor='loss', mode='min', restore_best_weights=True
            )

            # Train the new model
            history = new_model.fit(
                X_retrain, y_retrain,
                epochs=50,
                batch_size=32,
                validation_split=0.2,
                callbacks=[es],
                verbose=0
            )

            # Evaluate new model performance
            val_loss = min(history.history['val_loss'])

            # Only replace model if new one is better
            current_recent_mse = np.mean(list(self.mse_history)[-10:]) if self.mse_history else float('inf')
            if val_loss < current_recent_mse * 1.1:
                # Replace the model
                self.model = new_model

                # Save the retrained model
                retrain_path = self.model_path.replace('.keras', '_retrained.keras')
                self.model.save(retrain_path)

                # Clear history to start fresh
                self.r2_history.clear()
                self.mse_history.clear()
                self.accuracy_history.clear()

                logger.info(f"Model successfully retrained. New validation loss: {val_loss:.4f}")
                return True
            else:
                logger.warning(f"New model performance worse ({val_loss:.4f} vs {current_recent_mse:.4f})")
                return False

        except Exception as e:
            logger.error(f"Retraining failed: {e}")
            return False

    def _calculate_confidence(self, prediction_record: Dict) -> float:
        """Calculate confidence based on recent performance"""
        if len(self.accuracy_history) < 10:
            return 0.5

        recent_accuracy = np.mean(list(self.accuracy_history)[-10:])
        recent_r2 = self.performance_stats.get('avg_r2', 0.0)

        confidence = (recent_accuracy + max(0, recent_r2)) / 2
        return min(1.0, max(0.1, confidence))

    def get_recent_performance(self) -> Dict[str, Any]:
        """Get recent performance statistics"""
        if not self.prediction_history:
            return {}

        return {
            'recent_accuracy': np.mean(self.accuracy_history) if self.accuracy_history else 0.0,
            'recent_r2': np.mean(self.r2_history) if self.r2_history else 0.0,
            'recent_mse': np.mean(self.mse_history) if self.mse_history else 0.0,
            'total_predictions': len(self.prediction_history),
            'drift_events': self.performance_stats['drift_events'],
            'last_retrain': self.performance_stats['last_retrain_time'],
            'consecutive_poor': self.consecutive_poor_predictions,
            'transformer_fitted': self.transformer_fitted
        }

    def connect_sensor_agents(self, sensor_agents: Dict):
        """Connect to sensor agents for drift confirmation"""
        self.sensor_agents = sensor_agents
        logger.info(f"Connected to {len(sensor_agents)} sensor agents")

    def get_performance_plot_data(self) -> Dict[str, List]:
        """Get data for performance visualization"""
        if not self.prediction_history:
            return {}

        recent_records = [p for p in self.prediction_history if p.get('var_ground_truth') is not None]

        return {
            'timestamps': [r['timestamp'] for r in recent_records],
            'predicted': [r['predicted_window'] for r in recent_records],
            'actual': [r['var_ground_truth'] for r in recent_records],
            'accuracy': [r.get('accuracy', 0) for r in recent_records],
            'r2_scores': list(self.r2_history),
            'mse_scores': list(self.mse_history)
        }

    def save_performance_state(self, filepath: str):
        """Save current performance state"""
        state = {
            'performance_stats': self.performance_stats.copy(),
            'prediction_history': list(self.prediction_history)[-100:],
            'accuracy_history': list(self.accuracy_history),
            'r2_history': list(self.r2_history),
            'mse_history': list(self.mse_history),
            'transformer_fitted': self.transformer_fitted
        }

        try:
            # Convert datetime objects to strings for JSON serialization
            for record in state['prediction_history']:
                if 'timestamp' in record:
                    # Check if timestamp is already a string
                    if hasattr(record['timestamp'], 'isoformat'):
                        record['timestamp'] = record['timestamp'].isoformat()
                # Convert numpy arrays to lists for JSON serialization
                if 'feature_vector' in record and hasattr(record['feature_vector'], 'tolist'):
                    record['feature_vector'] = record['feature_vector'].tolist()

            # Handle last_retrain_time
            if state['performance_stats']['last_retrain_time']:
                if hasattr(state['performance_stats']['last_retrain_time'], 'isoformat'):
                    state['performance_stats']['last_retrain_time'] = state['performance_stats']['last_retrain_time'].isoformat()

            # Save to file
            with open(filepath, 'w') as f:
                json.dump(state, f, default=str, indent=2)

            logger.info(f"Performance state saved to {filepath}")

        except Exception as e:
            logger.error(f"Failed to save performance state: {e}")
            # Try simpler save without complex objects
            simple_state = {
                'total_predictions': len(self.prediction_history),
                'avg_accuracy': np.mean(self.accuracy_history) if self.accuracy_history else 0.0,
                'avg_r2': np.mean(self.r2_history) if self.r2_history else 0.0,
                'avg_mse': np.mean(self.mse_history) if self.mse_history else 0.0,
                'drift_events': self.performance_stats['drift_events'],
                'retraining_events': self.performance_stats['retraining_events']
            }

            with open(filepath.replace('.json', '_simple.json'), 'w') as f:
                json.dump(simple_state, f, indent=2)
            print(f"Saved simplified performance state to {filepath.replace('.json', '_simple.json')}")



# Test with YOUR actual data - Real-time VAR calculation only
if __name__ == "__main__":
    # Initialize the agent with your actual model
    agent = AdaptiveWindowAgent(
        model_path="/content/drive/MyDrive/PHD/2025/DGRNet-MLP-Versions/METROPM_MLP_model_Daily.keras"
    )

    print("Loading your actual dataset...")

    # Load your actual saved dataset
    Long_train = np.load('/content/drive/MyDrive/PHD/2025/TEMP_OUTPUT_METROPM/multivariate_long_sequences-TRAIN-Daily-DIRECT-VAR.npy')
    print(f"Loaded Long_train shape: {Long_train.shape}")

    # Take last 100 entries as test module
    test_sequences = Long_train[-100:]
    print(f"Testing with last 100 sequences: {test_sequences.shape}")

    print("\nStarting real-time VAR testing...")
    print("=" * 70)

    # Test with YOUR actual data - Real-time VAR calculation only
if __name__ == "__main__":
    # Initialize the agent with your actual model
    agent = AdaptiveWindowAgent(
        model_path="/content/drive/MyDrive/PHD/2025/DGRNet-MLP-Versions/METROPM_MLP_model_Daily.keras"
    )

    print("Loading your actual dataset...")

    # Load your actual saved dataset
    Long_train = np.load('/content/drive/MyDrive/PHD/2025/TEMP_OUTPUT_METROPM/multivariate_long_sequences-TRAIN-Daily-DIRECT-VAR.npy')
    print(f"Loaded Long_train shape: {Long_train.shape}")

    # Take last 100 entries as test module
    test_sequences = Long_train[-100:]
    print(f"Testing with last 100 sequences: {test_sequences.shape}")

    # Check model input requirements
    if agent.model:
        model_input_shape = agent.model.input_shape
        print(f"Model expects input shape: {model_input_shape}")
        required_features = model_input_shape[1]  # Should be 600 based on error
    else:
        required_features = 600  # Default from error message

    print(f"Model requires {required_features} features")
    print("\nStarting real-time VAR testing...")
    print("=" * 70)

    for i in range(len(test_sequences)):
        sequence_3d = test_sequences[i]  # Shape: (50, 12)

        # Simply flatten the sequence to get 600 features (50 * 12 = 600)
        features = sequence_3d.flatten()  # This gives you exactly 600 features

        print(f"Sample {i+1}: Using flattened sequence of size {len(features)}")

        # Test the agent with real-time VAR calculation
        result = agent.predict_window_size(features, sequence_3d=sequence_3d)

        # Handle potential missing keys safely
        mlp_pred = result.get('predicted_window', 0)
        var_gt = result.get('var_ground_truth', None)
        error_msg = result.get('error', None)

        if error_msg:
            print(f"Sample {i+1}: ERROR - {error_msg}")
            continue

        # Print results every 10 iterations
        if i % 10 == 0:
            performance = agent.get_recent_performance()
            error = abs(mlp_pred - var_gt) if var_gt else 0
            accuracy_pct = ((1 - error/max(var_gt, 1)) * 100) if var_gt else 0

            print(f"Sample {i+1:3d}: MLP={mlp_pred:2d}, VAR_GT={var_gt if var_gt else 'N/A'}, "
                  f"Error={error:2d}, Accuracy={accuracy_pct:5.1f}%, "
                  f"R2={performance.get('recent_r2', 0):6.3f}")

        # Check for drift detection
        if result.get('drift_detected', False):
            print(f"*** DRIFT DETECTED at sample {i+1} ***")
            drift_action = result.get('drift_action')
            if drift_action:
                print(f"Drift action: {drift_action}")

    print("\n" + "=" * 70)
    print("FINAL PERFORMANCE SUMMARY")
    print("=" * 70)

    final_performance = agent.get_recent_performance()

    print(f"Total predictions: {final_performance.get('total_predictions', 0)}")
    print(f"Average accuracy: {final_performance.get('recent_accuracy', 0):.4f}")
    print(f"Average R2 score: {final_performance.get('recent_r2', 0):.4f}")
    print(f"Average MSE: {final_performance.get('recent_mse', 0):.4f}")
    print(f"Drift events: {final_performance.get('drift_events', 0)}")
    print(f"Retraining events: {agent.performance_stats['retraining_events']}")
    print(f"Transformer fitted: {final_performance.get('transformer_fitted', False)}")

    # Save test results
    agent.save_performance_state("real_data_test_results.json")
    print(f"\nTest results saved to: real_data_test_results.json")

    # Plot final results if we have valid predictions
    plot_data = agent.get_performance_plot_data()
    if plot_data and len(plot_data.get('predicted', [])) > 5:
        import matplotlib.pyplot as plt

        plt.figure(figsize=(15, 5))

        # Subplot 1: MLP vs VAR scatter
        plt.subplot(1, 3, 1)
        plt.scatter(plot_data['actual'], plot_data['predicted'], alpha=0.7, color='blue')
        min_val = min(min(plot_data['actual']), min(plot_data['predicted']))
        max_val = max(max(plot_data['actual']), max(plot_data['predicted']))
        plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='Perfect prediction')
        plt.xlabel('VAR Ground Truth Window Size')
        plt.ylabel('MLP Predicted Window Size')
        plt.title('MLP vs VAR Window Predictions')
        plt.legend()
        plt.grid(True)

        # Subplot 2: Accuracy trend
        plt.subplot(1, 3, 2)
        plt.plot(plot_data['accuracy'], color='green')
        plt.xlabel('Sample Number')
        plt.ylabel('Prediction Accuracy')
        plt.title('Accuracy Trend Over Time')
        plt.grid(True)

        # Subplot 3: R2 trend
        plt.subplot(1, 3, 3)
        if plot_data['r2_scores']:
            plt.plot(plot_data['r2_scores'], color='orange')
            plt.xlabel('Window Number')
            plt.ylabel('R2 Score')
            plt.title('R2 Score Evolution')
            plt.grid(True)

        plt.tight_layout()
        plt.show()

        # Calculate final statistics
        from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
        final_r2 = r2_score(plot_data['actual'], plot_data['predicted'])
        final_mse = mean_squared_error(plot_data['actual'], plot_data['predicted'])
        final_mae = mean_absolute_error(plot_data['actual'], plot_data['predicted'])

        print(f"\nFinal Test Statistics:")
        print(f"Overall R2 Score: {final_r2:.4f}")
        print(f"Overall MSE: {final_mse:.4f}")
        print(f"Overall MAE: {final_mae:.4f}")
        print(f"Mean prediction accuracy: {np.mean(plot_data['accuracy']):.4f}")
    else:
        print("No valid predictions made for plotting")


    print("\n" + "=" * 70)
    print("FINAL PERFORMANCE SUMMARY")
    print("=" * 70)

    final_performance = agent.get_recent_performance()

    print(f"Total predictions: {final_performance.get('total_predictions', 0)}")
    print(f"Average accuracy: {final_performance.get('recent_accuracy', 0):.4f}")
    print(f"Average R2 score: {final_performance.get('recent_r2', 0):.4f}")
    print(f"Average MSE: {final_performance.get('recent_mse', 0):.4f}")
    print(f"Drift events: {final_performance.get('drift_events', 0)}")
    print(f"Retraining events: {agent.performance_stats['retraining_events']}")
    print(f"Transformer fitted: {final_performance.get('transformer_fitted', False)}")

    # Save test results
    agent.save_performance_state("real_data_test_results.json")
    print(f"\nTest results saved to: real_data_test_results.json")

    # Plot final results
    plot_data = agent.get_performance_plot_data()
    if plot_data and len(plot_data.get('predicted', [])) > 5:
        import matplotlib.pyplot as plt

        plt.figure(figsize=(15, 5))

        # Subplot 1: MLP vs VAR scatter
        plt.subplot(1, 3, 1)
        plt.scatter(plot_data['actual'], plot_data['predicted'], alpha=0.7, color='blue')
        min_val = min(min(plot_data['actual']), min(plot_data['predicted']))
        max_val = max(max(plot_data['actual']), max(plot_data['predicted']))
        plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='Perfect prediction')
        plt.xlabel('VAR Ground Truth Window Size')
        plt.ylabel('MLP Predicted Window Size')
        plt.title('MLP vs VAR Window Predictions')
        plt.legend()
        plt.grid(True)

        # Subplot 2: Accuracy trend
        plt.subplot(1, 3, 2)
        plt.plot(plot_data['accuracy'], color='green')
        plt.xlabel('Sample Number')
        plt.ylabel('Prediction Accuracy')
        plt.title('Accuracy Trend Over Time')
        plt.grid(True)

        # Subplot 3: R2 trend
        plt.subplot(1, 3, 3)
        if plot_data['r2_scores']:
            plt.plot(plot_data['r2_scores'], color='orange')
            plt.xlabel('Window Number')
            plt.ylabel('R2 Score')
            plt.title('R2 Score Evolution')
            plt.grid(True)

        plt.tight_layout()
        plt.show()

        # Calculate and print final statistics
        final_r2 = r2_score(plot_data['actual'], plot_data['predicted'])
        final_mse = mean_squared_error(plot_data['actual'], plot_data['predicted'])
        final_mae = mean_absolute_error(plot_data['actual'], plot_data['predicted'])

        print(f"\nFinal Test Statistics:")
        print(f"Overall R2 Score: {final_r2:.4f}")
        print(f"Overall MSE: {final_mse:.4f}")
        print(f"Overall MAE: {final_mae:.4f}")
        print(f"Mean prediction accuracy: {np.mean(plot_data['accuracy']):.4f}")

In [None]:
from google.colab import drive
drive.mount('/content/drive')