# Agent Training Notebook

This notebook demonstrates how to train and evaluate agents in the Agentic AI framework.

**Topics Covered:**
- Agent initialization
- Environment setup
- Training loops
- Performance visualization
- Model checkpointing

## 1. Setup and Imports

In [None]:
import sys
sys.path.insert(0, '../src')

import numpy as np
import matplotlib.pyplot as plt
from agents import LearningAgent, AutonomousAgent
from environment import Simulator
from utils import MetricsTracker, Visualizer, setup_logger
from config import load_config

# Setup logging
logger = setup_logger('agent_training', level='INFO')
print("✓ Imports successful")

## 2. Configuration

In [None]:
# Training configuration
CONFIG = {
    'num_episodes': 100,
    'max_steps': 200,
    'eval_frequency': 10,
    'save_frequency': 50
}

# Agent configuration
agent_config = {
    'learning_rate': 0.001,
    'discount_factor': 0.95,
    'epsilon_start': 1.0,
    'epsilon_end': 0.01,
    'epsilon_decay': 0.995,
    'batch_size': 32,
    'memory_size': 10000
}

# Environment configuration
env_config = {
    'num_agents': 1,
    'state_dim': 8,
    'action_dim': 4,
    'max_steps': CONFIG['max_steps'],
    'reward_type': 'dense'
}

print("Configuration loaded:")
print(f"  Episodes: {CONFIG['num_episodes']}")
print(f"  Max steps per episode: {CONFIG['max_steps']}")
print(f"  Learning rate: {agent_config['learning_rate']}")

## 3. Initialize Environment and Agent

In [None]:
# Create environment
env = Simulator(env_config)
print(f"✓ Environment created: {env}")

# Create agent
agent = LearningAgent(agent_config, name="TrainingAgent")
agent.initialize()
print(f"✓ Agent initialized: {agent}")

# Initialize metrics tracker
metrics = MetricsTracker(window_size=100)
print("✓ Metrics tracker ready")

## 4. Training Loop

In [None]:
print(f"Starting training for {CONFIG['num_episodes']} episodes...\n")

for episode in range(CONFIG['num_episodes']):
    observation = env.reset()
    done = False
    episode_reward = 0
    steps = 0
    
    while not done:
        # Agent selects action
        action = agent.act(observation)
        
        # Execute action in environment
        next_observation, reward, done, info = env.step(action)
        
        # Agent learns
        experience = {
            'state': observation,
            'action': action,
            'reward': reward,
            'next_state': next_observation,
            'done': done
        }
        agent.learn(experience)
        
        episode_reward += reward
        steps += 1
        observation = next_observation
    
    # Record metrics
    metrics.record('episode_reward', episode_reward, episode=episode)
    metrics.record('episode_length', steps, episode=episode)
    metrics.record('epsilon', agent.epsilon, episode=episode)
    
    # Print progress
    if (episode + 1) % CONFIG['eval_frequency'] == 0:
        avg_reward = metrics.get_moving_average('episode_reward', 10)
        print(f"Episode {episode + 1}/{CONFIG['num_episodes']} | "
              f"Avg Reward: {avg_reward:.2f} | "
              f"ε: {agent.epsilon:.3f} | "
              f"Memory: {len(agent.memory)}")

print("\n✓ Training complete!")

## 5. Performance Analysis

In [None]:
# Get statistics
stats = agent.get_stats()
print("Agent Statistics:")
print(f"  Episodes completed: {stats['episodes_completed']}")
print(f"  Final epsilon: {stats['epsilon']:.3f}")
print(f"  Memory size: {stats['memory_size']}")
print(f"  Average reward: {metrics.get_mean('episode_reward'):.2f}")
print(f"  Best reward: {metrics.get_max('episode_reward'):.2f}")
print(f"  Average episode length: {metrics.get_mean('episode_length'):.2f}")

# Generate detailed report
print("\n" + metrics.generate_report(['episode_reward', 'episode_length']))

## 6. Visualizations

In [None]:
# Create visualizer
viz = Visualizer()

# Get data
rewards = metrics.get_all('episode_reward')
episode_lengths = metrics.get_all('episode_length')
epsilons = metrics.get_all('epsilon')

# Plot learning curve
plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
plt.plot(rewards, alpha=0.3, label='Episode Reward')
if len(rewards) >= 20:
    moving_avg = np.convolve(rewards, np.ones(20)/20, mode='valid')
    plt.plot(range(19, len(rewards)), moving_avg, 'r-', linewidth=2, label='Moving Avg (20)')
plt.xlabel('Episode')
plt.ylabel('Reward')
plt.title('Learning Curve')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 3, 2)
plt.plot(episode_lengths)
plt.xlabel('Episode')
plt.ylabel('Steps')
plt.title('Episode Length')
plt.grid(True, alpha=0.3)

plt.subplot(1, 3, 3)
plt.plot(epsilons)
plt.xlabel('Episode')
plt.ylabel('Epsilon')
plt.title('Exploration Rate Decay')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('../data/training/training_results.png', dpi=300, bbox_inches='tight')
plt.show()

print("✓ Visualizations saved to ../data/training/")

## 7. Save Model Checkpoint

In [None]:
# Save agent checkpoint
checkpoint_path = '../data/checkpoints/agent_checkpoint.pkl'
agent.save(checkpoint_path)
print(f"✓ Model checkpoint saved to {checkpoint_path}")

## 8. Test Trained Agent

In [None]:
# Test the trained agent
print("Testing trained agent...")

test_rewards = []
num_test_episodes = 10

# Disable exploration for testing
original_epsilon = agent.epsilon
agent.epsilon = 0.0

for test_ep in range(num_test_episodes):
    obs = env.reset()
    done = False
    test_reward = 0
    
    while not done:
        action = agent.act(obs)
        obs, reward, done, _ = env.step(action)
        test_reward += reward
    
    test_rewards.append(test_reward)

# Restore epsilon
agent.epsilon = original_epsilon

print(f"\nTest Results ({num_test_episodes} episodes):")
print(f"  Average reward: {np.mean(test_rewards):.2f}")
print(f"  Std deviation: {np.std(test_rewards):.2f}")
print(f"  Min reward: {np.min(test_rewards):.2f}")
print(f"  Max reward: {np.max(test_rewards):.2f}")

## Summary

This notebook demonstrated:
- ✓ Agent initialization and configuration
- ✓ Environment setup
- ✓ Training loop implementation
- ✓ Performance metrics tracking
- ✓ Visualization of training progress
- ✓ Model checkpointing
- ✓ Testing trained agent

Next steps:
- Experiment with different hyperparameters
- Try different agent types
- Implement custom reward functions
- Add more complex environments