# 🎛️ TRAFFIC FORECAST - CONTROL PANEL v5.1

**Project:** Traffic Forecast Academic (DSP391m)  
**Version:** 5.1 - Adaptive Scheduling & Smart Caching  
**Coverage:** HCMC Downtown, 4096m radius, 78 nodes  
**Cost:** $21/day (25% savings), $147 for 7 days  
**Updated:** October 29, 2025

---

## 📋 Pipeline Overview

This notebook controls the **entire data collection pipeline** for deployment:

1. **⚙️ Configuration** - Update configs, verify settings
2. **🗺️ Node Topology** - Manage Overpass cache, view node distribution  
3. **📡 Data Collection** - Run Google API collection, monitor progress
4. **✅ Data Quality** - Validate collected data, check completeness
5. **📊 Visualization** - Plot traffic patterns, node coverage
6. **🚀 Deployment** - Deploy to GCP VM, schedule collection
7. **👀 Monitoring** - Check logs, API costs, system health

---

## 1️⃣ Setup & Environment Check

In [None]:
import sys
import os
import json
import yaml
from pathlib import Path
from datetime import datetime
import subprocess

# Setup paths
PROJECT_ROOT = Path.cwd().parent if 'notebooks' in str(Path.cwd()) else Path.cwd()
sys.path.insert(0, str(PROJECT_ROOT))
os.chdir(PROJECT_ROOT)

print("=" * 70)
print("🎛️ TRAFFIC FORECAST CONTROL PANEL v5.0")
print("=" * 70)
print(f"📁 Project Root: {PROJECT_ROOT}")
print(f"🐍 Python: {sys.version.split()[0]}")
print(f"📅 Current Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("=" * 70)

### Check Required Files & Directories

In [None]:
# Check critical files
required_files = {
    '⚙️ Config': 'configs/project_config.yaml',
    '🔑 API Keys': '.env',
    '🗺️ Overpass Cache': 'cache/overpass_topology.json',
    '📊 Statistics': 'data/statistics.json',
}

print("\n📋 FILE CHECK:")
print("-" * 70)
for name, path in required_files.items():
    file_path = PROJECT_ROOT / path
    status = "✅" if file_path.exists() else "❌"
    size = f"{file_path.stat().st_size:,} bytes" if file_path.exists() else "N/A"
    print(f"{status} {name:20s} {path:40s} {size}")

---
## 2️⃣ Configuration Management

### View Current Configuration

In [None]:
# Load and display config
config_file = PROJECT_ROOT / 'configs' / 'project_config.yaml'

with open(config_file, 'r') as f:
    config = yaml.safe_load(f)

print("\n⚙️ CURRENT CONFIGURATION:")
print("=" * 70)

# Display key settings
sections = {
    'area': ['name', 'center_lat', 'center_lon', 'radius_m'],
    'node_selection': ['min_degree', 'min_importance', 'min_distance_meters', 'road_types'],
    'collection': ['k_nearest', 'batch_size'],
    'google_api': ['rate_limit_per_minute', 'retry_attempts']
}

for section, keys in sections.items():
    print(f"\n📌 {section.upper().replace('_', ' ')}:")
    for key in keys:
        value = config.get(section, {}).get(key)
        print(f"   • {key:25s}: {value}")

### Update Configuration (Optional)

In [None]:
def update_config(section, key, new_value):
    """Update a configuration value"""
    config_file = PROJECT_ROOT / 'configs' / 'project_config.yaml'
    
    with open(config_file, 'r') as f:
        config = yaml.safe_load(f)
    
    if section not in config:
        config[section] = {}
    
    old_value = config[section].get(key)
    config[section][key] = new_value
    
    with open(config_file, 'w') as f:
        yaml.dump(config, f, default_flow_style=False, sort_keys=False)
    
    print(f"✅ Updated {section}.{key}: {old_value} → {new_value}")

# Example usage (uncomment to use):
# update_config('collection', 'batch_size', 10)
# update_config('google_api', 'rate_limit_per_minute', 2800)

print("ℹ️ Use update_config(section, key, value) to change settings")

---
## 3️⃣ Node Topology Management

### View Cached Topology

In [None]:
# Load and analyze topology
cache_file = PROJECT_ROOT / 'cache' / 'overpass_topology.json'
stats_file = PROJECT_ROOT / 'data' / 'statistics.json'

if cache_file.exists():
    with open(cache_file, 'r') as f:
        topology = json.load(f)
    
    nodes = topology.get('nodes', [])
    edges = topology.get('edges', [])
    
    print("\n🗺️ CACHED TOPOLOGY:")
    print("=" * 70)
    print(f"📍 Total Nodes: {len(nodes)}")
    print(f"🔗 Total Edges: {len(edges)}")
    
    # Load statistics
    if stats_file.exists():
        with open(stats_file, 'r') as f:
            stats = json.load(f)
        
        print(f"\n📊 NODE STATISTICS:")
        print(f"   • Average Degree: {stats.get('avg_degree', 0):.2f}")
        print(f"   • Degree Range: {stats.get('min_degree', 0)} - {stats.get('max_degree', 0)}")
        print(f"   • Average Importance: {stats.get('avg_importance', 0):.2f}")
        print(f"   • Importance Range: {stats.get('min_importance', 0):.1f} - {stats.get('max_importance', 0):.1f}")
        print(f"   • Min Distance: {stats.get('min_distance_meters', 0)}m")
        
        print(f"\n🚦 ROAD TYPE DISTRIBUTION:")
        for road_type, count in stats.get('road_type_distribution', {}).items():
            pct = (count / len(nodes) * 100) if nodes else 0
            print(f"   • {road_type:15s}: {count:3d} nodes ({pct:5.1f}%)")
else:
    print("❌ No cached topology found. Run Overpass collection first.")

### Refresh Topology Cache

In [None]:
def refresh_topology(force=False):
    """Refresh Overpass topology cache"""
    cache_file = PROJECT_ROOT / 'cache' / 'overpass_topology.json'
    
    if cache_file.exists() and not force:
        print(f"ℹ️ Cache exists. Use refresh_topology(force=True) to force refresh.")
        return
    
    print("\n🔄 Refreshing topology from Overpass API...")
    print("-" * 70)
    
    cmd = [
        'conda', 'run', '-n', 'dsp', '--no-capture-output',
        'python', 'traffic_forecast/collectors/overpass/collector.py'
    ]
    
    result = subprocess.run(cmd, cwd=PROJECT_ROOT, capture_output=True, text=True)
    
    if result.returncode == 0:
        print("✅ Topology refresh completed successfully!")
        print(result.stdout)
    else:
        print("❌ Topology refresh failed!")
        print(result.stderr)

# Example usage (uncomment to use):
# refresh_topology(force=False)

print("ℹ️ Use refresh_topology(force=True) to update node cache")

---
## 4️⃣ Data Collection

### Check API Key

In [None]:
# Check .env file
env_file = PROJECT_ROOT / '.env'

if env_file.exists():
    with open(env_file, 'r') as f:
        env_content = f.read()
    
    # Extract API key (masked)
    for line in env_content.split('\n'):
        if line.startswith('GOOGLE_MAPS_API_KEY='):
            api_key = line.split('=')[1].strip()
            masked_key = api_key[:20] + '...' + api_key[-4:] if len(api_key) > 24 else 'INVALID'
            print(f"\n🔑 Google Maps API Key: {masked_key}")
            print(f"   Status: {'✅ Set' if api_key else '❌ Missing'}")
            break
else:
    print("❌ .env file not found!")

### Run Collection (Test Mode - 5 edges)

In [None]:
def run_collection_test():
    """Run test collection with 5 edges"""
    print("\n🧪 TEST COLLECTION (5 edges):")
    print("=" * 70)
    
    cmd = [
        'bash', '-c',
        'export GOOGLE_TEST_LIMIT=5 && conda run -n dsp --no-capture-output python traffic_forecast/collectors/google/collector.py'
    ]
    
    result = subprocess.run(cmd, cwd=PROJECT_ROOT, capture_output=True, text=True)
    
    print(result.stdout)
    if result.returncode != 0:
        print(result.stderr)
    
    return result.returncode == 0

# Example usage (uncomment to run):
# success = run_collection_test()

print("ℹ️ Use run_collection_test() to test with 5 edges")

### Run Full Collection (All Edges)

In [None]:
def run_collection_full():
    """Run full collection with all edges"""
    print("\n🚀 FULL COLLECTION (all edges):")
    print("=" * 70)
    print("⏳ This may take several minutes...")
    
    cmd = [
        'conda', 'run', '-n', 'dsp', '--no-capture-output',
        'python', 'traffic_forecast/collectors/google/collector.py'
    ]
    
    result = subprocess.run(cmd, cwd=PROJECT_ROOT, capture_output=True, text=True)
    
    print(result.stdout)
    if result.returncode != 0:
        print(result.stderr)
    
    # Parse summary
    if 'Success rate:' in result.stdout:
        for line in result.stdout.split('\n'):
            if 'Total edges:' in line or 'Successful:' in line or 'Failed:' in line or 'Success rate:' in line:
                print(f"   {line.strip()}")
    
    return result.returncode == 0

# Example usage (uncomment to run):
# success = run_collection_full()

print("ℹ️ Use run_collection_full() to collect all edges")

---
## 5️⃣ Data Quality Check

### Validate Collected Data

In [None]:
def validate_collection():
    """Validate the most recent collection"""
    traffic_file = PROJECT_ROOT / 'data' / 'traffic_edges.json'
    
    if not traffic_file.exists():
        print("❌ No traffic data found. Run collection first.")
        return False
    
    with open(traffic_file, 'r') as f:
        data = json.load(f)
    
    print("\n✅ DATA VALIDATION:")
    print("=" * 70)
    print(f"📦 Total Records: {len(data)}")
    
    if not data:
        print("❌ No data in file!")
        return False
    
    # Check data structure
    sample = data[0]
    required_fields = ['origin', 'destination', 'speed_kmh', 'duration_sec', 'distance_km', 'timestamp']
    
    print(f"\n📋 Field Coverage:")
    for field in required_fields:
        present = field in sample
        status = "✅" if present else "❌"
        print(f"   {status} {field}")
    
    # Data statistics
    speeds = [d.get('speed_kmh', 0) for d in data if 'speed_kmh' in d]
    
    if speeds:
        import statistics
        print(f"\n🚗 Speed Statistics (km/h):")
        print(f"   • Mean: {statistics.mean(speeds):.2f}")
        print(f"   • Median: {statistics.median(speeds):.2f}")
        print(f"   • Min: {min(speeds):.2f}")
        print(f"   • Max: {max(speeds):.2f}")
        print(f"   • Std Dev: {statistics.stdev(speeds):.2f}" if len(speeds) > 1 else "")
    
    # Timestamp check
    timestamps = [d.get('timestamp') for d in data if 'timestamp' in d]
    if timestamps:
        first_time = min(timestamps)
        last_time = max(timestamps)
        print(f"\n⏰ Time Range:")
        print(f"   • First: {first_time}")
        print(f"   • Last: {last_time}")
    
    return True

# Example usage (uncomment to run):
# validate_collection()

print("ℹ️ Use validate_collection() to check data quality")

---
## 6️⃣ Visualization

### Plot Node Coverage Map

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_node_coverage():
    """Plot node coverage on map"""
    cache_file = PROJECT_ROOT / 'cache' / 'overpass_topology.json'
    
    if not cache_file.exists():
        print("❌ No topology cache found")
        return
    
    with open(cache_file, 'r') as f:
        topology = json.load(f)
    
    nodes = topology.get('nodes', [])
    
    if not nodes:
        print("❌ No nodes in cache")
        return
    
    # Extract coordinates
    lats = [n['lat'] for n in nodes]
    lons = [n['lon'] for n in nodes]
    
    # Load config for area center
    config_file = PROJECT_ROOT / 'configs' / 'project_config.yaml'
    with open(config_file, 'r') as f:
        config = yaml.safe_load(f)
    
    center_lat = config['area']['center_lat']
    center_lon = config['area']['center_lon']
    radius_m = config['area']['radius_m']
    
    # Plot
    plt.figure(figsize=(12, 10))
    
    # Plot nodes
    plt.scatter(lons, lats, c='red', s=50, alpha=0.6, label=f'Nodes ({len(nodes)})', zorder=3)
    
    # Plot center
    plt.scatter([center_lon], [center_lat], c='blue', s=200, marker='*', 
                label='Center', zorder=4, edgecolors='black')
    
    # Plot radius circle
    radius_deg = radius_m / 111000  # rough conversion
    circle = plt.Circle((center_lon, center_lat), radius_deg, 
                        color='blue', fill=False, linestyle='--', 
                        linewidth=2, label=f'Coverage ({radius_m}m)', zorder=2)
    plt.gca().add_patch(circle)
    
    plt.xlabel('Longitude', fontsize=12)
    plt.ylabel('Latitude', fontsize=12)
    plt.title(f'Node Coverage Map - {config["area"]["name"]}', fontsize=14, fontweight='bold')
    plt.legend(fontsize=10)
    plt.grid(True, alpha=0.3)
    plt.axis('equal')
    plt.tight_layout()
    plt.show()
    
    print(f"✅ Plotted {len(nodes)} nodes")

# Example usage (uncomment to run):
# plot_node_coverage()

print("ℹ️ Use plot_node_coverage() to visualize nodes")

### Plot Traffic Speed Distribution

In [None]:
def plot_speed_distribution():
    """Plot traffic speed distribution"""
    traffic_file = PROJECT_ROOT / 'data' / 'traffic_edges.json'
    
    if not traffic_file.exists():
        print("❌ No traffic data found")
        return
    
    with open(traffic_file, 'r') as f:
        data = json.load(f)
    
    speeds = [d.get('speed_kmh', 0) for d in data if 'speed_kmh' in d]
    
    if not speeds:
        print("❌ No speed data available")
        return
    
    plt.figure(figsize=(14, 5))
    
    # Histogram
    plt.subplot(1, 2, 1)
    plt.hist(speeds, bins=30, color='steelblue', edgecolor='black', alpha=0.7)
    plt.xlabel('Speed (km/h)', fontsize=12)
    plt.ylabel('Frequency', fontsize=12)
    plt.title('Traffic Speed Distribution', fontsize=14, fontweight='bold')
    plt.axvline(np.mean(speeds), color='red', linestyle='--', linewidth=2, label=f'Mean: {np.mean(speeds):.1f}')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # Box plot
    plt.subplot(1, 2, 2)
    plt.boxplot(speeds, vert=True)
    plt.ylabel('Speed (km/h)', fontsize=12)
    plt.title('Traffic Speed Box Plot', fontsize=14, fontweight='bold')
    plt.grid(True, alpha=0.3, axis='y')
    
    plt.tight_layout()
    plt.show()
    
    print(f"✅ Plotted {len(speeds)} speed measurements")

# Example usage (uncomment to run):
# plot_speed_distribution()

print("ℹ️ Use plot_speed_distribution() to visualize traffic speeds")

---
## 7️⃣ Deployment to GCP VM

### Generate Deployment Package

In [None]:
def create_deployment_package():
    """Create deployment package for GCP VM"""
    import shutil
    
    deploy_dir = PROJECT_ROOT / 'deploy_package'
    deploy_dir.mkdir(exist_ok=True)
    
    print("\n📦 CREATING DEPLOYMENT PACKAGE:")
    print("=" * 70)
    
    # Files to include
    files_to_copy = [
        'configs/project_config.yaml',
        'cache/overpass_topology.json',
        '.env',
        'traffic_forecast/collectors/google/collector.py',
        'traffic_forecast/collectors/overpass/collector.py',
        'requirements.txt',
    ]
    
    for file_path in files_to_copy:
        src = PROJECT_ROOT / file_path
        if src.exists():
            dst = deploy_dir / file_path
            dst.parent.mkdir(parents=True, exist_ok=True)
            shutil.copy2(src, dst)
            print(f"✅ Copied: {file_path}")
        else:
            print(f"⚠️ Missing: {file_path}")
    
    # Create deployment script
    deploy_script = deploy_dir / 'deploy.sh'
    with open(deploy_script, 'w') as f:
        f.write("""#!/bin/bash
# Deployment script for GCP VM

echo "🚀 Deploying Traffic Forecast v5.0..."

# Install dependencies
pip install -r requirements.txt

# Run Overpass collection (one-time)
python traffic_forecast/collectors/overpass/collector.py

# Run Google collection
python traffic_forecast/collectors/google/collector.py

echo "✅ Deployment complete!"
""")
    
    deploy_script.chmod(0o755)
    print(f"✅ Created deployment script")
    
    print(f"\n📁 Package created at: {deploy_dir}")
    print(f"📋 Total files: {sum(1 for _ in deploy_dir.rglob('*') if _.is_file())}")

# Example usage (uncomment to run):
# create_deployment_package()

print("ℹ️ Use create_deployment_package() to prepare deployment files")

### Scheduled Collection Script

In [None]:
def create_cron_script():
    """Create cron script for scheduled collection"""
    script_file = PROJECT_ROOT / 'scripts' / 'run_scheduled_collection.sh'
    
    script_content = """#!/bin/bash
# Scheduled collection script for cron
# Add to crontab: */15 * * * * /path/to/run_scheduled_collection.sh

cd "$(dirname "$0")/.."

# Activate conda environment
source ~/miniconda3/etc/profile.d/conda.sh
conda activate dsp

# Run collection
python traffic_forecast/collectors/google/collector.py >> logs/collection.log 2>&1

# Log completion
echo "[$(date)] Collection completed" >> logs/cron.log
"""
    
    with open(script_file, 'w') as f:
        f.write(script_content)
    
    script_file.chmod(0o755)
    
    print(f"\n✅ Created scheduled collection script:")
    print(f"   {script_file}")
    print(f"\n📝 To schedule every 15 minutes, add to crontab:")
    print(f"   */15 * * * * {script_file}")

# Example usage (uncomment to run):
# create_cron_script()

print("ℹ️ Use create_cron_script() to generate cron script")

---
## 8️⃣ Monitoring & Cost Tracking

### Estimate API Costs

In [None]:
def estimate_api_costs(collections_per_day=96):
    """
    Estimate Google Maps API costs
    
    Args:
        collections_per_day: Number of collection runs per day (default: 96 = every 15 min)
    """
    cache_file = PROJECT_ROOT / 'cache' / 'overpass_topology.json'
    
    if cache_file.exists():
        with open(cache_file, 'r') as f:
            topology = json.load(f)
        nodes = len(topology.get('nodes', []))
    else:
        nodes = 78  # default
    
    # Calculate costs
    edges_per_collection = nodes * 3  # k_nearest = 3
    cost_per_request = 0.005  # $5 per 1000 requests
    
    print("\n💰 API COST ESTIMATION:")
    print("=" * 70)
    print(f"📍 Nodes: {nodes}")
    print(f"🔗 Edges per collection: {edges_per_collection}")
    print(f"🔄 Collections per day: {collections_per_day}")
    print(f"\n📊 COSTS:")
    
    # Daily
    daily_requests = edges_per_collection * collections_per_day
    daily_cost = daily_requests * cost_per_request
    print(f"   • Daily: {daily_requests:,} requests = ${daily_cost:.2f}")
    
    # Weekly
    weekly_requests = daily_requests * 7
    weekly_cost = daily_cost * 7
    print(f"   • Weekly: {weekly_requests:,} requests = ${weekly_cost:.2f}")
    
    # Monthly
    monthly_requests = daily_requests * 30
    monthly_cost = daily_cost * 30
    print(f"   • Monthly: {monthly_requests:,} requests = ${monthly_cost:.2f}")
    
    # Free tier
    free_tier = 200  # $200 free credit
    free_days = free_tier / daily_cost if daily_cost > 0 else 0
    print(f"\n🎁 FREE TIER ($200 credit):")
    print(f"   • Covers approximately {free_days:.1f} days")
    
    return {
        'daily_cost': daily_cost,
        'weekly_cost': weekly_cost,
        'monthly_cost': monthly_cost,
        'free_days': free_days
    }

# Example usage (uncomment to run):
# costs = estimate_api_costs(collections_per_day=96)

print("ℹ️ Use estimate_api_costs(collections_per_day=96) to calculate costs")

### Check Collection Logs

In [None]:
def check_logs(last_n_lines=50):
    """Check most recent collection logs"""
    log_file = PROJECT_ROOT / 'logs' / 'collection.log'
    
    if not log_file.exists():
        print("ℹ️ No logs yet. Logs will be created after first scheduled run.")
        return
    
    print(f"\n📋 LAST {last_n_lines} LOG LINES:")
    print("=" * 70)
    
    with open(log_file, 'r') as f:
        lines = f.readlines()
        for line in lines[-last_n_lines:]:
            print(line.rstrip())

# Example usage (uncomment to run):
# check_logs(last_n_lines=50)

print("ℹ️ Use check_logs(last_n_lines=50) to view recent logs")

---
## 9️⃣ Quick Actions

In [None]:
print("\n🎯 QUICK ACTIONS:")
print("=" * 70)
print("""
1. Test Collection (5 edges):
   run_collection_test()

2. Full Collection (all edges):
   run_collection_full()

3. Validate Data:
   validate_collection()

4. Plot Node Map:
   plot_node_coverage()

5. Plot Speed Distribution:
   plot_speed_distribution()

6. Refresh Topology:
   refresh_topology(force=True)

7. Update Config:
   update_config('collection', 'batch_size', 10)

8. Estimate Costs:
   estimate_api_costs(collections_per_day=96)

9. Create Deployment Package:
   create_deployment_package()

10. Create Cron Script:
    create_cron_script()
""")

---
## 📝 Notes

### Collection Schedule Recommendations

**For Academic Project (7-14 days):**
- **Frequency:** Every 15 minutes (96 collections/day)
- **Coverage:** Peak hours (7-9 AM, 5-7 PM) + off-peak
- **Duration:** 7-14 days minimum for temporal patterns
- **Cost:** ~$11-22 with free $200 credit

**For Production:**
- **Frequency:** Every 5-10 minutes during peak hours
- **Coverage:** 24/7 with adaptive intervals
- **Duration:** Continuous with data retention policy

### Data Quality Checklist

- [x] ✅ Topology cached with 78 nodes
- [x] ✅ Min distance 200m between nodes
- [x] ✅ Coverage radius 4096m
- [x] ✅ Real API only (no mock data)
- [ ] 🔄 Collect for 7+ days
- [ ] 🔄 Validate completeness
- [ ] 🔄 Check for missing timestamps
- [ ] 🔄 Monitor API errors

### Deployment Checklist

- [ ] 🚀 Create GCP VM instance
- [ ] 🚀 Install conda environment
- [ ] 🚀 Copy deployment package
- [ ] 🚀 Configure .env with API key
- [ ] 🚀 Test collection manually
- [ ] 🚀 Setup cron schedule
- [ ] 🚀 Monitor logs daily
- [ ] 🚀 Track API costs

---
**End of Control Panel**

# 🎛️ TRAFFIC FORECAST - CONTROL PANEL v5.0

**Project:** Traffic Forecast Academic (DSP391m)  
**Version:** 5.0 - Real API Only  
**Coverage:** HCMC Downtown, 4096m radius, 78 nodes  
**Updated:** October 29, 2025

---

## 📋 Pipeline Overview

This notebook controls the **entire data collection pipeline** for deployment:

1. **Configuration Management** - Update configs, verify settings
2. **Node Topology** - Manage Overpass cache, view node distribution
3. **Data Collection** - Run Google API collection, monitor progress
4. **Data Quality** - Validate collected data, check completeness
5. **Visualization** - Plot traffic patterns, node coverage
6. **Deployment** - Deploy to GCP VM, schedule collection
7. **Monitoring** - Check logs, API costs, system health

---

## 1. Project Overview & Status

In [None]:
import sys
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Add project root to path
project_root = Path.cwd().parent if 'notebooks' in str(Path.cwd()) else Path.cwd()
sys.path.insert(0, str(project_root))

# Import project modules
from traffic_forecast import PROJECT_ROOT, __version__
from traffic_forecast.ml.data_loader import DataLoader

print(f'Project Root: {PROJECT_ROOT}')
print(f'Version: {__version__}')
print(f'Python: {sys.version.split()[0]}')

In [None]:
# Check system status
loader = DataLoader()
runs = loader.list_runs()

print('SYSTEM STATUS')
print('=' * 60)
print(f'Total Data Runs: {len(runs)}')
print(f'Latest Run: {runs[0] if runs else "None"}')

# Check models availability
try:
    from traffic_forecast.ml.trainer import ModelTrainer
    print('ML Models: READY')
except Exception as e:
    print(f'ML Models: ERROR - {e}')

try:
    from traffic_forecast.models.lstm_traffic import LSTMTrafficPredictor
    print('LSTM Model: AVAILABLE')
except Exception as e:
    print(f'LSTM Model: NOT AVAILABLE')

try:
    from traffic_forecast.models.graph import ASTGCNTrafficModel
    print('ASTGCN Model: AVAILABLE')
except Exception as e:
    print(f'ASTGCN Model: NOT AVAILABLE')

print('=' * 60)

## 2. Quick Actions

### 2.1 Run Data Collection

In [None]:
# Run single collection cycle using subprocess
import subprocess

collection_script = PROJECT_ROOT / 'scripts' / 'collection' / 'collect_and_render.py'

result = subprocess.run(
    ['conda', 'run', '-n', 'dsp', 'python', str(collection_script), '--once', '--no-visualize'],
    capture_output=True,
    text=True
)

print(result.stdout)
if result.returncode != 0:
    print('ERROR:', result.stderr)

### 2.2 Train ML Models

In [None]:
# Quick model training
from traffic_forecast.ml.trainer import ModelTrainer
from traffic_forecast.ml.data_loader import DataLoader

loader = DataLoader()
X_train, X_test, y_train, y_test = loader.prepare_train_test()

# Train XGBoost (best model)
trainer = ModelTrainer(model_type='xgboost')
trainer.train(X_train, y_train)
metrics = trainer.evaluate(X_test, y_test)

print('\nXGBoost Training Results:')
for metric, value in metrics.items():
    print(f'{metric}: {value:.4f}')

## 3. Data Management

### 3.1 List All Data Runs

In [None]:
import pandas as pd
from datetime import datetime

runs = loader.list_runs()
run_info = []

for run_dir in runs[:10]:  # Show latest 10
    run_info.append({
        'Run Directory': run_dir.name,
        'Date': run_dir.stat().st_mtime
    })

df_runs = pd.DataFrame(run_info)
df_runs['Date'] = pd.to_datetime(df_runs['Date'], unit='s')
df_runs = df_runs.sort_values('Date', ascending=False)

print(f'Latest {len(df_runs)} Data Runs:')
display(df_runs)

### 3.2 Cleanup Old Runs

In [None]:
# Cleanup runs older than 14 days using subprocess
import subprocess

cleanup_script = PROJECT_ROOT / 'scripts' / 'data_management' / 'cleanup_runs.py'

result = subprocess.run(
    ['conda', 'run', '-n', 'dsp', 'python', str(cleanup_script), '--days', '14'],
    capture_output=True,
    text=True
)

print(result.stdout)
if result.returncode != 0:
    print('ERROR:', result.stderr)

## 4. Model Training Center

### 4.1 Train All Traditional ML Models

In [None]:
from traffic_forecast.ml.trainer import ModelTrainer

models_to_train = ['random_forest', 'xgboost', 'lightgbm', 'gradient_boosting']
results = {}

for model_type in models_to_train:
    print(f'\nTraining {model_type}...')
    trainer = ModelTrainer(model_type=model_type)
    trainer.train(X_train, y_train)
    metrics = trainer.evaluate(X_test, y_test)
    results[model_type] = metrics
    
# Display comparison
df_results = pd.DataFrame(results).T
df_results = df_results.sort_values('r2', ascending=False)

print('\nModel Comparison:')
display(df_results)

### 4.2 Train LSTM Model

In [None]:
try:
    from traffic_forecast.models.lstm_traffic import LSTMTrafficPredictor
    
    lstm = LSTMTrafficPredictor(
        sequence_length=12,
        lstm_units=[128, 64],
        dropout_rate=0.2
    )
    
    history = lstm.fit(
        X_train, y_train,
        X_val=X_test, y_val=y_test,
        epochs=50,
        batch_size=32,
        verbose=1
    )
    
    metrics = lstm.evaluate(X_test, y_test)
    print('\nLSTM Results:')
    for k, v in metrics.items():
        print(f'{k}: {v:.4f}')
except Exception as e:
    print(f'LSTM training failed: {e}')

## 5. Visualization Dashboard

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')

# Load latest data
df_latest = loader.load_latest_run()

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Speed distribution
axes[0, 0].hist(df_latest['speed_kmh'], bins=50, edgecolor='black')
axes[0, 0].set_title('Speed Distribution')
axes[0, 0].set_xlabel('Speed (km/h)')
axes[0, 0].set_ylabel('Frequency')

# Duration distribution
axes[0, 1].hist(df_latest['duration_minutes'], bins=50, edgecolor='black', color='orange')
axes[0, 1].set_title('Duration Distribution')
axes[0, 1].set_xlabel('Duration (minutes)')
axes[0, 1].set_ylabel('Frequency')

# Weather conditions
if 'temperature_c' in df_latest.columns:
    axes[1, 0].scatter(df_latest['temperature_c'], df_latest['speed_kmh'], alpha=0.3)
    axes[1, 0].set_title('Temperature vs Speed')
    axes[1, 0].set_xlabel('Temperature (C)')
    axes[1, 0].set_ylabel('Speed (km/h)')

# Time series
if 'timestamp' in df_latest.columns:
    speed_ts = df_latest.groupby('timestamp')['speed_kmh'].mean()
    axes[1, 1].plot(speed_ts.index, speed_ts.values)
    axes[1, 1].set_title('Average Speed Over Time')
    axes[1, 1].set_xlabel('Time')
    axes[1, 1].set_ylabel('Speed (km/h)')
    axes[1, 1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## 6. System Health Check

In [None]:
def check_module(module_name):
    try:
        __import__(module_name)
        return 'OK'
    except:
        return 'MISSING'

health_status = {
    'Module': ['pandas', 'numpy', 'scikit-learn', 'xgboost', 'lightgbm', 'tensorflow'],
    'Status': []
}

for module in health_status['Module']:
    health_status['Status'].append(check_module(module))

df_health = pd.DataFrame(health_status)

print('System Health Check:')
display(df_health)

# Check disk space
data_dir = PROJECT_ROOT / 'data' / 'downloads'
if data_dir.exists():
    total_size = sum(f.stat().st_size for f in data_dir.rglob('*') if f.is_file())
    print(f'\nData Directory Size: {total_size / 1024 / 1024:.2f} MB')

## 7. Deployment & Production

### 7.1 Export Best Model

In [None]:
# Export best model for deployment
best_trainer = ModelTrainer(model_type='xgboost')
best_trainer.train(X_train, y_train)

export_dir = PROJECT_ROOT / 'models' / 'production'
export_dir.mkdir(parents=True, exist_ok=True)

model_path = export_dir / 'xgboost_best.pkl'
best_trainer.save(str(model_path))
print(f'Model exported to: {export_dir}')

### 7.2 Run API Server (Development)

In [None]:
# Start FastAPI server (run this in a separate terminal)
# conda run -n dsp uvicorn traffic_forecast.api.main:app --reload --port 8000

print('To start the API server, run this command in a terminal:')
print('conda run -n dsp uvicorn traffic_forecast.api.main:app --reload --port 8000')

---

## End of Control Dashboard

**Navigation:**
- [DATA_DASHBOARD.ipynb](./DATA_DASHBOARD.ipynb) - Data exploration and analysis
- [ML_TRAINING.ipynb](./ML_TRAINING.ipynb) - Detailed ML training
- [SCRIPTS_RUNNER.ipynb](./SCRIPTS_RUNNER.ipynb) - Script execution

**Documentation:**
- [README.md](../README.md) - Project overview
- [CHANGELOG.md](../CHANGELOG.md) - Version history