# Agent Validation Notebook

This notebook validates the functionality of all agents in the AI Load Predictor system.

## Agents to Test:
1. Data Processing Agent
2. EDA Agent
3. Modeling Agent
4. Model Verification Agent
5. Model Deployment Agent
6. Forecasting Agent
7. Visualization Agent
8. Coordinating Agent

In [1]:
# Import required libraries
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Add the project root to the Python path
project_root = os.path.dirname(os.getcwd())
if project_root not in sys.path:
    sys.path.append(project_root)

print(f"Project root: {project_root}")
print(f"Current working directory: {os.getcwd()}")

Project root: /Users/gonzalf1/git/AILoadPredictor2
Current working directory: /Users/gonzalf1/git/AILoadPredictor2/notebooks


## 1. Data Processing Agent Validation

In [3]:
# Test Data Processing Agent
try:
    from agents.data_processing_agent import DataProcessingAgent
    
    print("✅ DataProcessingAgent imported successfully")
    
    # Initialize the agent
    data_agent = DataProcessingAgent()
    print("✅ DataProcessingAgent initialized successfully")
    
    # Test with sample data file
    sample_data_path = '../data/sample_hourly_load.csv'
    if os.path.exists(sample_data_path):
        with open(sample_data_path, 'rb') as f:
            test_data = data_agent.ingest_from_file(f)
        print(f"✅ Data loaded from file: {test_data.shape}")
        print(f"Columns: {list(test_data.columns)}")
    else:
        # Create sample data for testing
        dates = pd.date_range('2023-01-01', periods=100, freq='H')
        test_data = pd.DataFrame({
            'timestamp': dates,
            'load': np.random.normal(100, 20, 100),
            'temperature': np.random.normal(20, 5, 100)
        })
        print("✅ Sample data created for testing")
    
    # Test data processing
    processed_data = data_agent.process(
        data=test_data,
        timestamp_col='timestamp',
        target_col='load',
        freq='H'
    )
    print(f"✅ Data processed successfully: {processed_data.shape}")
    print(f"Processed columns: {list(processed_data.columns)}")
    
except Exception as e:
    print(f"❌ DataProcessingAgent failed: {str(e)}")
    import traceback
    traceback.print_exc()

✅ DataProcessingAgent imported successfully
✅ DataProcessingAgent initialized successfully
✅ Data loaded from file: (8760, 5)
Columns: ['timestamp', 'load', 'temperature', 'humidity', 'is_holiday']
✅ Data processed successfully: (8760, 14)
Processed columns: ['timestamp', 'load', 'hour', 'day', 'day_of_week', 'month', 'year', 'hour_sin', 'hour_cos', 'day_of_week_sin', 'day_of_week_cos', 'month_sin', 'month_cos', 'is_weekend']


## 2. EDA Agent Validation

In [5]:
# Test EDA Agent
try:
    from agents.eda_agent import EDAAgent
    
    print("✅ EDAAgent imported successfully")
    
    # Initialize the agent
    eda_agent = EDAAgent()
    print("✅ EDAAgent initialized successfully")
    
    # Use processed data from previous step
    if 'processed_data' in locals():
        eda_results = eda_agent.analyze(
            data=processed_data,
            target_col='load',
            timestamp_col='timestamp'
        )
        print("✅ EDA performed successfully")
        print(f"EDA results keys: {list(eda_results.keys())}")
        
        # Check if plots were generated
        if 'time_series_plot' in eda_results:
            print("✅ Time series plot generated")
        if 'descriptive_stats' in eda_results:
            print("✅ Descriptive statistics generated")
            print(eda_results['descriptive_stats'].head())
    else:
        print("❌ No processed data available for EDA")
        
except Exception as e:
    print(f"❌ EDAAgent failed: {str(e)}")
    import traceback
    traceback.print_exc()

✅ EDAAgent imported successfully
✅ EDAAgent initialized successfully
✅ EDA performed successfully
EDA results keys: ['descriptive_stats', 'time_series_plot', 'seasonality_plot', 'autocorrelation_plot', 'insights']
✅ Time series plot generated
✅ Descriptive statistics generated
  Statistic        Value
0     count  8760.000000
1      mean  9573.999290
2       std  2683.926395
3       min  2369.582584
4       25%  7584.125778


## 3. Modeling Agent Validation

In [None]:
# Test Modeling Agent
try:
    from agents.modeling_agent import ModelingAgent
    
    print("✅ ModelingAgent imported successfully")
    
    # Initialize the agent
    modeling_agent = ModelingAgent()
    print("✅ ModelingAgent initialized successfully")
    
    # Use processed data from previous step
    if 'processed_data' in locals():
        # Test model training with a simple set of models
        models_to_train = ['RandomForest', 'LinearRegression']
        config = {
            'train_size': 0.8,
            'random_state': 42
        }
        
        trained_models = modeling_agent.train(
            data=processed_data,
            target_col='load',
            timestamp_col='timestamp',
            models=models_to_train,
            config=config
        )
        print("✅ Models trained successfully")
        print(f"Trained models: {list(trained_models['models'].keys())}")
        print(f"Best model: {trained_models['best_model']}")
        
        # Display metrics
        if 'metrics' in trained_models:
            print("✅ Model metrics generated")
            for model_name, metrics in trained_models['metrics'].items():
                print(f"{model_name}: MAPE={metrics.get('mape', 'N/A'):.2f}%, R2={metrics.get('r2', 'N/A'):.3f}")
    else:
        print("❌ No processed data available for modeling")
        
except Exception as e:
    print(f"❌ ModelingAgent failed: {str(e)}")
    import traceback
    traceback.print_exc()

## 4. Model Verification Agent Validation

In [None]:
# Test Model Verification Agent
try:
    from agents.model_verification_agent import ModelVerificationAgent
    
    print("✅ ModelVerificationAgent imported successfully")
    
    # Initialize the agent
    verification_agent = ModelVerificationAgent()
    print("✅ ModelVerificationAgent initialized successfully")
    
    # Use trained models from previous step
    if 'trained_models' in locals():
        # Test model verification
        verification_results = verification_agent.verify_models(
            models=trained_models['models'],
            data=processed_data,
            target_col='load',
            timestamp_col='timestamp'
        )
        print("✅ Model verification completed successfully")
        print(f"Verification results keys: {list(verification_results.keys())}")
        
        # Check verification metrics
        if 'verification_metrics' in verification_results:
            print("✅ Verification metrics generated")
    else:
        print("❌ No trained models available for verification")
        
except Exception as e:
    print(f"❌ ModelVerificationAgent failed: {str(e)}")
    import traceback
    traceback.print_exc()

## 5. Forecasting Agent Validation

In [None]:
# Test Forecasting Agent
try:
    from agents.forecasting_agent import ForecastingAgent
    
    print("✅ ForecastingAgent imported successfully")
    
    # Initialize the agent
    forecasting_agent = ForecastingAgent()
    print("✅ ForecastingAgent initialized successfully")
    
    # Use trained models from previous step
    if 'trained_models' in locals():
        # Get the best model
        best_model_name = trained_models['best_model']
        best_model_info = trained_models['models'][best_model_name]
        
        # Generate forecasts
        forecasts = forecasting_agent.generate_forecasts(
            data=processed_data,
            model_info=best_model_info,
            horizon=24,  # 24-hour forecast
            confidence_interval=0.95
        )
        print("✅ Forecasts generated successfully")
        print(f"Forecast results keys: {list(forecasts.keys())}")
        
        # Check forecast data
        if 'forecast_data' in forecasts:
            forecast_df = forecasts['forecast_data']
            print(f"✅ Forecast data shape: {forecast_df.shape}")
            print(f"Forecast columns: {list(forecast_df.columns)}")
    else:
        print("❌ No trained models available for forecasting")
        
except Exception as e:
    print(f"❌ ForecastingAgent failed: {str(e)}")
    import traceback
    traceback.print_exc()

## 6. Visualization Agent Validation

In [None]:
# Test Visualization Agent
try:
    from agents.visualization_agent import VisualizationAgent
    
    print("✅ VisualizationAgent imported successfully")
    
    # Initialize the agent
    viz_agent = VisualizationAgent()
    print("✅ VisualizationAgent initialized successfully")
    
    # Test basic visualization creation
    if 'processed_data' in locals():
        # Create time series plot
        time_series_plot = viz_agent.create_time_series_plot(
            data=processed_data,
            timestamp_col='timestamp',
            target_col='load'
        )
        print("✅ Time series plot created successfully")
        
        # Test forecast visualization if forecasts are available
        if 'forecasts' in locals():
            forecast_plot = viz_agent.create_forecast_plot(
                historical_data=processed_data,
                forecast_data=forecasts['forecast_data'],
                timestamp_col='timestamp',
                target_col='load'
            )
            print("✅ Forecast plot created successfully")
    else:
        print("❌ No data available for visualization")
        
except Exception as e:
    print(f"❌ VisualizationAgent failed: {str(e)}")
    import traceback
    traceback.print_exc()

## 7. Model Deployment Agent Validation

In [None]:
# Test Model Deployment Agent
try:
    from agents.model_deployment_agent import ModelDeploymentAgent
    
    print("✅ ModelDeploymentAgent imported successfully")
    
    # Initialize the agent
    deployment_agent = ModelDeploymentAgent()
    print("✅ ModelDeploymentAgent initialized successfully")
    
    # Test model deployment (usually involves saving/loading models)
    if 'trained_models' in locals():
        best_model_name = trained_models['best_model']
        best_model_info = trained_models['models'][best_model_name]
        
        # Test model preparation for deployment
        deployment_package = deployment_agent.prepare_model_for_deployment(
            model_info=best_model_info,
            model_name=best_model_name,
            metadata={
                'model_type': best_model_name,
                'training_date': pd.Timestamp.now().isoformat(),
                'performance_metrics': trained_models['metrics'][best_model_name]
            }
        )
        print("✅ Model prepared for deployment successfully")
        print(f"Deployment package keys: {list(deployment_package.keys())}")
    else:
        print("❌ No trained models available for deployment")
        
except Exception as e:
    print(f"❌ ModelDeploymentAgent failed: {str(e)}")
    import traceback
    traceback.print_exc()

## 8. Coordinating Agent Validation

In [None]:
# Test Coordinating Agent
try:
    from agents.coordinating_agent import CoordinatingAgent
    
    print("✅ CoordinatingAgent imported successfully")
    
    # Initialize the agent
    coordinator = CoordinatingAgent()
    print("✅ CoordinatingAgent initialized successfully")
    
    # Test the full workflow coordination
    print("Testing full workflow coordination...")
    
    # Check if all sub-agents are initialized
    sub_agents = [
        'data_processing_agent',
        'eda_agent', 
        'modeling_agent',
        'model_verification_agent',
        'model_deployment_agent',
        'forecasting_agent',
        'visualization_agent'
    ]
    
    for agent_name in sub_agents:
        if hasattr(coordinator, agent_name):
            print(f"✅ {agent_name} initialized in coordinator")
        else:
            print(f"❌ {agent_name} missing in coordinator")
    
    # Check workflow state
    print(f"✅ Workflow state initialized with keys: {list(coordinator.state.keys())}")
    
    # Test a simple coordination method
    if hasattr(coordinator, 'process_data') and 'test_data' in locals():
        coordinated_result = coordinator.process_data(
            data=test_data,
            timestamp_col='timestamp',
            target_col='load',
            freq='H'
        )
        print("✅ Coordinated data processing successful")
        
except Exception as e:
    print(f"❌ CoordinatingAgent failed: {str(e)}")
    import traceback
    traceback.print_exc()

## 9. Integration Test - Full Pipeline

In [None]:
# Full integration test using the coordinating agent
print("=" * 50)
print("FULL INTEGRATION TEST")
print("=" * 50)

try:
    # Create sample data for the full pipeline test
    dates = pd.date_range('2023-01-01', periods=168, freq='H')  # 1 week of hourly data
    np.random.seed(42)
    
    # Create realistic load pattern
    base_load = 100
    daily_pattern = 20 * np.sin(2 * np.pi * np.arange(168) / 24)
    weekly_pattern = 10 * np.sin(2 * np.pi * np.arange(168) / (24 * 7))
    noise = np.random.normal(0, 5, 168)
    load = base_load + daily_pattern + weekly_pattern + noise
    
    pipeline_data = pd.DataFrame({
        'timestamp': dates,
        'load': load,
        'temperature': 20 + 5 * np.sin(2 * np.pi * np.arange(168) / 24) + np.random.normal(0, 2, 168)
    })
    
    print(f"✅ Integration test data created: {pipeline_data.shape}")
    
    # Initialize coordinator
    if 'coordinator' not in locals():
        coordinator = CoordinatingAgent()
    
    # Run full pipeline
    print("\n1. Processing data...")
    processed = coordinator.process_data(
        data=pipeline_data,
        timestamp_col='timestamp',
        target_col='load',
        freq='H'
    )
    coordinator.state['processed_data'] = processed
    print(f"   ✅ Data processed: {processed.shape}")
    
    print("\n2. Performing EDA...")
    eda_results = coordinator.perform_eda(
        data=processed,
        target_col='load',
        timestamp_col='timestamp'
    )
    coordinator.state['eda_results'] = eda_results
    print(f"   ✅ EDA completed with {len(eda_results)} result types")
    
    print("\n3. Training models...")
    models = coordinator.train_models(
        data=processed,
        target_col='load',
        timestamp_col='timestamp',
        models=['RandomForest', 'LinearRegression'],
        config={'train_size': 0.8, 'random_state': 42}
    )
    coordinator.state['trained_models'] = models
    coordinator.state['selected_model'] = models['best_model']
    print(f"   ✅ Models trained. Best model: {models['best_model']}")
    
    print("\n4. Generating forecasts...")
    forecasts = coordinator.generate_forecasts(
        data=processed,
        model_name=models['best_model'],
        horizon=24
    )
    coordinator.state['forecasts'] = forecasts
    print(f"   ✅ Forecasts generated for 24 periods")
    
    print("\n" + "=" * 50)
    print("INTEGRATION TEST COMPLETED SUCCESSFULLY!")
    print("=" * 50)
    
    # Summary
    print("\nPipeline Summary:")
    print(f"- Input data: {pipeline_data.shape[0]} records")
    print(f"- Processed data: {processed.shape[0]} records, {processed.shape[1]} features")
    print(f"- Models trained: {len(models['models'])}")
    print(f"- Best model: {models['best_model']}")
    print(f"- Forecast horizon: 24 periods")
    
except Exception as e:
    print(f"❌ Integration test failed: {str(e)}")
    import traceback
    traceback.print_exc()

## 10. Validation Summary

In [None]:
# Validation Summary
print("=" * 60)
print("AGENT VALIDATION SUMMARY")
print("=" * 60)

validation_results = {
    'DataProcessingAgent': '✅' if 'data_agent' in locals() else '❌',
    'EDAAgent': '✅' if 'eda_agent' in locals() else '❌',
    'ModelingAgent': '✅' if 'modeling_agent' in locals() else '❌',
    'ModelVerificationAgent': '✅' if 'verification_agent' in locals() else '❌',
    'ForecastingAgent': '✅' if 'forecasting_agent' in locals() else '❌',
    'VisualizationAgent': '✅' if 'viz_agent' in locals() else '❌',
    'ModelDeploymentAgent': '✅' if 'deployment_agent' in locals() else '❌',
    'CoordinatingAgent': '✅' if 'coordinator' in locals() else '❌'
}

for agent, status in validation_results.items():
    print(f"{agent:<25} {status}")

successful_agents = sum(1 for status in validation_results.values() if status == '✅')
total_agents = len(validation_results)

print(f"\nValidation Results: {successful_agents}/{total_agents} agents validated successfully")

if successful_agents == total_agents:
    print("🎉 ALL AGENTS VALIDATED SUCCESSFULLY!")
else:
    failed_agents = [agent for agent, status in validation_results.items() if status == '❌']
    print(f"⚠️  Failed agents: {', '.join(failed_agents)}")
    
print("\nNext steps:")
print("- Review any failed agent validations above")
print("- Check import paths and dependencies")
print("- Ensure all required methods are implemented in each agent")
print("- Test the agents with real data in the main application")