In [None]:
# Enhanced Warehouse Environment Exploration
# This notebook demonstrates the autonomous parcel routing environment

import sys
from pathlib import Path

# Add src to path for development
project_root = Path.cwd().parent
src_dir = project_root / 'src'
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

print(f"Project root: {project_root}")
print(f"Ready to explore the enhanced warehouse environment!")

In [None]:
# Import the enhanced APR modules
from apr import WarehouseEnv
from apr.utils import render_env
import matplotlib.pyplot as plt
import random

print("APR modules imported successfully!")

# Create the enhanced warehouse environment
env = WarehouseEnv(seed=42)
print(f"\n Created warehouse environment:")
print(f"   Size: {env.n_rows}x{env.n_cols}")
print(f"   Packages: {len(env.package_positions_initial)}")
print(f"   Shelves: {len(env.shelf_positions)}")
print(f"   Max steps: {env.max_steps}")

# Reset and show initial state
state = env.reset()
print(f"\n Environment reset. Initial state: {state}")
env.print_state()

In [None]:
# Visualize the environment with different rendering modes

print(" Environment Visualization Modes:")
print("\n1. Console ASCII rendering:")
env.render(mode="human")

print("\n2. Rich sprite-based rendering:")
try:
    env.render(mode="sprites")
except Exception as e:
    print(f"Sprite rendering failed: {e}")
    print("Falling back to matplotlib rendering...")
    env.render(mode="matplotlib")

In [None]:
# Demonstrate package pickup and delivery mechanics

print(" Package Pickup & Delivery Demo")
print("="*50)

# Reset environment
env.reset()
total_reward = 0

# Manual sequence to demonstrate mechanics
print("\n Starting position:")
env.print_state()

# Move right to pickup first package at (0,1)
print("\n  Moving right to pickup package...")
next_state, reward, done, info = env.step(3)  # right
total_reward += reward
print(f"Reward: {reward} | Info: {info}")
env.render(mode="human")

# Try to pickup another package at (1,2)
print("\n  Moving down...")
next_state, reward, done, info = env.step(1)  # down
total_reward += reward
print(f"Reward: {reward} | New position: {next_state}")

print("\n  Moving right to (1,2)...")
next_state, reward, done, info = env.step(3)  # right
total_reward += reward
print(f"Reward: {reward} | Info: {info}")
env.render(mode="human")

print(f"\n💰 Total reward so far: {total_reward}")
print(f" Packages collected: {env.num_picked_up_items}")
print(f" Packages remaining: {len(env.packages_remaining)}")

In [None]:
# Test the training pipeline with enhanced environment

print(" Training Pipeline Test")
print("="*50)

# Import training modules
from apr.train import main
import sys

# Set up arguments for a short training run
original_argv = sys.argv.copy()
sys.argv = ["train.py", "--config", "cfg/baseline.yaml"]

print("Starting training with enhanced environment...")
print("(This will run for 1000 episodes)")

try:
    main()
    print(" Training completed successfully!")
except Exception as e:
    print(f" Training failed: {e}")
finally:
    sys.argv = original_argv  # Restore original arguments

In [None]:
# Analyze training results

import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

print(" Training Results Analysis")
print("="*50)

# Find the latest training run
outputs_dir = Path("../outputs/runs")
if outputs_dir.exists():
    run_dirs = [d for d in outputs_dir.iterdir() if d.is_dir()]
    if run_dirs:
        latest_run = max(run_dirs, key=lambda x: x.stat().st_mtime)
        print(f" Latest run: {latest_run.name}")
        
        # Load and plot metrics
        metrics_file = latest_run / "metrics.csv"
        if metrics_file.exists():
            df = pd.read_csv(metrics_file)
            
            # Plot reward curve
            plt.figure(figsize=(12, 4))
            
            plt.subplot(1, 2, 1)
            plt.plot(df['episode'], df['reward'])
            plt.title('Training Rewards Over Episodes')
            plt.xlabel('Episode')
            plt.ylabel('Reward')
            plt.grid(True, alpha=0.3)
            
            plt.subplot(1, 2, 2)
            plt.plot(df['episode'], df['epsilon'])
            plt.title('Epsilon Decay Over Episodes')
            plt.xlabel('Episode')
            plt.ylabel('Epsilon')
            plt.grid(True, alpha=0.3)
            
            plt.tight_layout()
            plt.show()
            
            # Print summary stats
            print(f"\n Training Summary:")
            print(f"   Final reward: {df['reward'].iloc[-1]:.1f}")
            print(f"   Average reward (last 100): {df['reward'].tail(100).mean():.1f}")
            print(f"   Max reward: {df['reward'].max():.1f}")
            print(f"   Final epsilon: {df['epsilon'].iloc[-1]:.3f}")
        else:
            print("NOTFOUND: No metrics file found")
    else:
        print("NOTFOUND: No training runs found")
else:
    print("NOTFOUND: Outputs directory not found")