# Blackholio Agent Training Visualization

This notebook provides comprehensive visualization and analysis of training runs for Blackholio agents.

## Features
- Load and visualize training logs
- Compare multiple training runs
- Analyze reward progression and learning curves
- Identify training issues and optimization opportunities
- Generate training reports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json
import glob
from typing import Dict, List, Optional

# Configure plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

print("Training visualization notebook loaded successfully!")

## 1. Load Training Data

Load training logs from TensorBoard or custom logging files.

In [None]:
class TrainingDataLoader:
    def __init__(self, log_dir: str):
        self.log_dir = Path(log_dir)
        
    def load_tensorboard_data(self, experiment_name: str) -> pd.DataFrame:
        """Load data from TensorBoard logs"""
        try:
            from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
            
            event_file = self.log_dir / experiment_name
            event_acc = EventAccumulator(str(event_file))
            event_acc.Reload()
            
            # Extract scalar data
            data = {}
            for tag in event_acc.Tags()['scalars']:
                scalar_events = event_acc.Scalars(tag)
                steps = [e.step for e in scalar_events]
                values = [e.value for e in scalar_events]
                data[tag] = pd.Series(values, index=steps)
                
            return pd.DataFrame(data)
            
        except ImportError:
            print("TensorBoard not available. Please install with: pip install tensorboard")
            return None
    
    def load_csv_data(self, experiment_name: str) -> pd.DataFrame:
        """Load data from CSV logs"""
        csv_file = self.log_dir / f"{experiment_name}.csv"
        if csv_file.exists():
            return pd.read_csv(csv_file)
        else:
            print(f"CSV file not found: {csv_file}")
            return None

# Initialize data loader
log_dir = "logs"  # Change this to your logs directory
loader = TrainingDataLoader(log_dir)

# List available experiments
if Path(log_dir).exists():
    experiments = [p.name for p in Path(log_dir).iterdir() if p.is_dir()]
    print(f"Available experiments: {experiments}")
else:
    print(f"Log directory not found: {log_dir}")
    print("Please update the log_dir variable to point to your training logs.")

In [None]:
# Load specific experiment data
experiment_name = "blackholio_agent_20250525"  # Change this to your experiment name

# Try loading from different sources
training_data = None

# Option 1: TensorBoard data
training_data = loader.load_tensorboard_data(experiment_name)

# Option 2: CSV data (if TensorBoard not available)
if training_data is None:
    training_data = loader.load_csv_data(experiment_name)

# Option 3: Generate sample data for demonstration
if training_data is None:
    print("No training data found. Generating sample data for demonstration.")
    
    # Generate realistic sample training data
    np.random.seed(42)
    n_steps = 1000
    steps = np.arange(n_steps)
    
    # Simulate learning curves
    episode_reward = np.cumsum(np.random.normal(0.1, 1.0, n_steps)) + 100 * np.log(steps + 1)
    policy_loss = 2.0 * np.exp(-steps / 200) + 0.1 + np.random.normal(0, 0.05, n_steps)
    value_loss = 1.5 * np.exp(-steps / 150) + 0.05 + np.random.normal(0, 0.03, n_steps)
    entropy = 1.0 * np.exp(-steps / 300) + 0.01 + np.random.normal(0, 0.02, n_steps)
    episode_length = 500 + 200 * np.log(steps + 1) + np.random.normal(0, 50, n_steps)
    
    training_data = pd.DataFrame({
        'episode_reward': episode_reward,
        'policy_loss': policy_loss,
        'value_loss': value_loss,
        'entropy': entropy,
        'episode_length': episode_length,
        'learning_rate': 3e-4 * np.exp(-steps / 500),
        'fps': 20 + np.random.normal(0, 2, n_steps)
    }, index=steps)

print(f"Training data shape: {training_data.shape}")
print(f"Available metrics: {list(training_data.columns)}")
training_data.head()

## 2. Training Progress Visualization

Visualize key training metrics over time.

In [None]:
def plot_training_metrics(data: pd.DataFrame, metrics: List[str], window_size: int = 50):
    """Plot training metrics with smoothing"""
    n_metrics = len(metrics)
    fig, axes = plt.subplots(n_metrics, 1, figsize=(14, 4*n_metrics))
    
    if n_metrics == 1:
        axes = [axes]
    
    for i, metric in enumerate(metrics):
        if metric in data.columns:
            # Raw data
            axes[i].plot(data.index, data[metric], alpha=0.3, color='lightblue', label='Raw')
            
            # Smoothed data
            smoothed = data[metric].rolling(window=window_size, center=True).mean()
            axes[i].plot(data.index, smoothed, linewidth=2, label=f'Smoothed (window={window_size})')
            
            axes[i].set_title(f'{metric.replace("_", " ").title()} Over Training')
            axes[i].set_xlabel('Training Steps')
            axes[i].set_ylabel(metric.replace('_', ' ').title())
            axes[i].legend()
            axes[i].grid(True, alpha=0.3)
        else:
            axes[i].text(0.5, 0.5, f'Metric "{metric}" not found', 
                        transform=axes[i].transAxes, ha='center', va='center')
    
    plt.tight_layout()
    plt.show()

# Plot key training metrics
key_metrics = ['episode_reward', 'policy_loss', 'value_loss', 'entropy']
plot_training_metrics(training_data, key_metrics)

## 3. Generate Training Report

Create a comprehensive training report.

In [None]:
def generate_training_report(data: pd.DataFrame, experiment_name: str = "Training"):
    """Generate a comprehensive training report"""
    print(f"Training Report: {experiment_name}")
    print("=" * 60)
    
    # Basic stats
    print(f"\nBasic Statistics:")
    print(f"  Total training steps: {len(data)}")
    print(f"  Data points collected: {data.count().sum()}")
    print(f"  Missing data points: {data.isnull().sum().sum()}")
    
    # Performance metrics
    if 'episode_reward' in data.columns:
        rewards = data['episode_reward'].dropna()
        print(f"\nReward Performance:")
        print(f"  Mean reward: {rewards.mean():.2f}")
        print(f"  Std reward: {rewards.std():.2f}")
        print(f"  Max reward: {rewards.max():.2f}")
        print(f"  Min reward: {rewards.min():.2f}")
        
        # Learning progress
        early_rewards = rewards.head(100).mean()
        late_rewards = rewards.tail(100).mean()
        improvement = (late_rewards - early_rewards) / abs(early_rewards) * 100
        print(f"  Improvement: {improvement:.1f}%")
    
    # Training stability
    if 'policy_loss' in data.columns and 'value_loss' in data.columns:
        policy_loss = data['policy_loss'].dropna()
        value_loss = data['value_loss'].dropna()
        
        print(f"\nTraining Stability:")
        print(f"  Policy loss CV: {policy_loss.std() / policy_loss.mean():.3f}")
        print(f"  Value loss CV: {value_loss.std() / value_loss.mean():.3f}")
    
    # Performance metrics
    if 'fps' in data.columns:
        fps = data['fps'].dropna()
        print(f"\nPerformance:")
        print(f"  Average FPS: {fps.mean():.1f}")
        print(f"  FPS stability: {fps.std():.1f}")
    
    print("\n" + "=" * 60)

generate_training_report(training_data, experiment_name)