# RL Sensor Optimization - Quick Start

Notebook per testare rapidamente l'environment e l'agent RL.

In [None]:
import sys
sys.path.append('../src')

import numpy as np
import matplotlib.pyplot as plt
from environment import SensorOptimizationEnv
from agent import DQNAgent, ActorCriticAgent

%matplotlib inline

## 1. Test Environment

In [None]:
# Crea environment
env = SensorOptimizationEnv(
    unitsX=30,
    unitsY=30,
    unitsZ=30,
    num_sensor_types=3,
    sensors_per_type=[15, 10, 8],
    constraint_ratio=0.1
)

print(f"Observation space: {env.observation_space.shape}")
print(f"Action space: {env.action_space.shape}")

In [None]:
# Reset e visualizza stato iniziale
obs, info = env.reset()
print(f"Initial observation shape: {obs.shape}")
print(f"Info: {info}")
env.render()

In [None]:
# Test azione random
action = env.action_space.sample()
print(f"Random action: {action}")

next_obs, reward, terminated, truncated, step_info = env.step(action)
print(f"\nReward: {reward:.2f}")
print(f"Coverage: {step_info['coverage']:.2%}")
print(f"Num boxes: {step_info['num_boxes']}")
print(f"Cable length: {step_info['total_cable_length']:.2f}")

## 2. Test Random Agent

In [None]:
# Run episodio con random agent
def run_random_episode(env, max_steps=50):
    obs, info = env.reset()
    episode_reward = 0
    
    rewards = []
    coverages = []
    
    for step in range(max_steps):
        action = env.action_space.sample()
        next_obs, reward, terminated, truncated, step_info = env.step(action)
        
        episode_reward += reward
        rewards.append(reward)
        coverages.append(step_info['coverage'])
        
        if terminated or truncated:
            break
    
    return episode_reward, rewards, coverages, step_info

# Run
total_reward, rewards, coverages, final_info = run_random_episode(env)

print(f"Episode finished!")
print(f"Total reward: {total_reward:.2f}")
print(f"Final coverage: {final_info['coverage']:.2%}")
print(f"Final cable length: {final_info['total_cable_length']:.2f}")
print(f"Num boxes: {final_info['num_boxes']}")

In [None]:
# Plot episode
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

axes[0].plot(rewards)
axes[0].set_xlabel('Step')
axes[0].set_ylabel('Reward')
axes[0].set_title('Rewards per Step')
axes[0].grid(alpha=0.3)

axes[1].plot(coverages)
axes[1].set_xlabel('Step')
axes[1].set_ylabel('Coverage')
axes[1].set_title('Coverage over Time')
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()

## 3. Test DQN Agent

In [None]:
# Crea agent
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]

agent = DQNAgent(
    state_dim=state_dim,
    action_dim=action_dim,
    hidden_dim=256,
    learning_rate=1e-4
)

print(f"Agent created on device: {agent.device}")
print(f"Q-network: {agent.q_network}")

In [None]:
# Test action selection
state, _ = env.reset()
action = agent.select_action(state, explore=True)
print(f"Selected action: {action}")
print(f"Action shape: {action.shape}")

## 4. Mini Training Loop

In [None]:
# Mini training (pochi episodi per test)
num_episodes = 50
max_steps = 30

episode_rewards = []
episode_coverages = []
losses = []

for episode in range(num_episodes):
    state, _ = env.reset()
    episode_reward = 0
    
    for step in range(max_steps):
        # Select action
        action = agent.select_action(state, explore=True)
        
        # Step
        next_state, reward, terminated, truncated, info = env.step(action)
        
        # Store transition
        agent.store_transition(state, action, reward, next_state, terminated or truncated)
        
        # Train
        loss = agent.train_step()
        if loss is not None:
            losses.append(loss)
        
        state = next_state
        episode_reward += reward
        
        if terminated or truncated:
            break
    
    # Update target network
    if episode % 5 == 0:
        agent.update_target_network()
    
    episode_rewards.append(episode_reward)
    episode_coverages.append(info['coverage'])
    
    if episode % 10 == 0:
        print(f"Episode {episode}: Reward = {episode_reward:.2f}, Coverage = {info['coverage']:.2%}, Epsilon = {agent.epsilon:.3f}")

print("\nTraining completed!")

In [None]:
# Plot training progress
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Rewards
axes[0].plot(episode_rewards)
axes[0].set_xlabel('Episode')
axes[0].set_ylabel('Total Reward')
axes[0].set_title('Training Rewards')
axes[0].grid(alpha=0.3)

# Coverages
axes[1].plot(episode_coverages)
axes[1].set_xlabel('Episode')
axes[1].set_ylabel('Coverage')
axes[1].set_title('Coverage over Training')
axes[1].grid(alpha=0.3)

# Losses
axes[2].plot(losses)
axes[2].set_xlabel('Training Step')
axes[2].set_ylabel('Loss')
axes[2].set_title('Training Loss')
axes[2].grid(alpha=0.3)

plt.tight_layout()
plt.show()

## 5. Evaluate Trained Agent

In [None]:
# Evaluate su 10 episodi
eval_rewards = []
eval_coverages = []

for _ in range(10):
    state, _ = env.reset()
    episode_reward = 0
    done = False
    
    while not done:
        action = agent.select_action(state, explore=False)  # Greedy
        next_state, reward, terminated, truncated, info = env.step(action)
        
        state = next_state
        episode_reward += reward
        done = terminated or truncated
    
    eval_rewards.append(episode_reward)
    eval_coverages.append(info['coverage'])

print(f"Evaluation Results:")
print(f"  Mean Reward: {np.mean(eval_rewards):.2f} Â± {np.std(eval_rewards):.2f}")
print(f"  Mean Coverage: {np.mean(eval_coverages):.2%}")
print(f"  Success Rate: {sum(c >= 0.99 for c in eval_coverages) / len(eval_coverages):.2%}")

## 6. Save/Load Agent

In [None]:
# Save agent
agent.save('../models/test_agent.pth')
print("Agent saved!")

In [None]:
# Load agent
new_agent = DQNAgent(state_dim=state_dim, action_dim=action_dim)
new_agent.load('../models/test_agent.pth')
print("Agent loaded!")
print(f"Stats: {new_agent.get_stats()}")

In [None]:
# Close environment
env.close()