# Raycasting/Lidar simulation

# Test Ray Environment 

In [1]:
from drone_2d_custom_gym_env.drone_2d_env_raycast import Drone2dEnvRaycastV2
import time  # Import time for delay in example

print("Testing Drone2dEnvRaycastV2...")
env = Drone2dEnvRaycastV2(
    render_sim=True,
    moving_platform=True,
    platform_speed=2.0,
    enable_wind=True,
    initial_pos_random_range_m=10.0,
    enable_raycasting=True,  # Ensure raycasting is on for testing
    num_rays=11,
    ray_fov_deg=120.0,
)
obs = env.reset()

for i in range(1500):
    env.render()
    action = env.action_space.sample()  # Take random actions
    obs, reward, done, info = env.step(action)
    # if i % 50 == 0:
    #     print(f"Step: {i}, Reward: {reward:.3f}")
    # print(f"Observation: {obs}")
    if done:
        print(f"Episode finished after {i+1} steps.")
        env.render()  # Render final frame
        time.sleep(1)
        obs = env.reset()

env.close()

pygame 2.6.1 (SDL 2.28.4, Python 3.10.16)
Hello from the pygame community. https://www.pygame.org/contribute.html
Testing Drone2dEnvRaycastV2...
RaycastV2 Observation Space Shape: (19,)

--- Resetting Episode 1 ---
Wind Enabled: True, Wind Dir: 120.6 deg. Moving Platform: True
Step 17: OUT OF BOUNDS!
Episode finished after 18 steps.
Pygame closed.


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


# Training the model (Agent)

## Training Configuration

In [1]:
MOVING_PLATFORM = True               #  Train with moving platform
PLATFORM_SPEED = 2.0                 #  Platform speed if moving   
INITIAL_POST_RAMDOM_RANGE_M = 8.0    #  Random start range
MAX_ALLOWED_TILT_ANGLE_RAD = 2       #  Allow slightly more tilt (approx 90 deg)
ENABLE_WIND = True                   #  Train with wind enabled
REWARD_LANDING = 500                 #  reward for landing more high reward increase interest   
REWARD_UN_LANDING = 50  

In [2]:

from stable_baselines3 import PPO

# --- Environment Configuration ---
# These should match the parameters you want to train the agent on
ENV_CONFIG = {
    "render_sim": False,             # IMPORTANT: Keep False for faster training
    "max_steps": 600,                # Max steps per episode during training
    "render_path": False,            # Disable rendering options for speed
    "render_shade": False,
    "shade_distance_m": 2.0,
             
    # No change if we are going resume training training
    
    "moving_platform": MOVING_PLATFORM,        #  Train with moving platform
    "platform_speed": PLATFORM_SPEED,           #  Platform speed if moving
    "initial_pos_random_range_m": INITIAL_POST_RAMDOM_RANGE_M,#  Random start range
    "max_allowed_tilt_angle_rad": MAX_ALLOWED_TILT_ANGLE_RAD, #  Allow slightly more tilt (approx 90 deg)
    "enable_wind": ENABLE_WIND,             #  Train with wind enabled
    'reward_landing': REWARD_LANDING,           #  reward for landing more high reward increase interest   
    'reward_un_landing': REWARD_UN_LANDING          #  reward for unstable landing more high reward increase interest   
    # Note: wind_speed is fixed at 5.0 inside the env for now
}


# --- Training Configuration ---
TOTAL_TIMESTEPS = 3_000_000      # Total steps for training (adjust as needed)
MODEL_ALGORITHM = PPO           # Algorithm to use (PPO is a good default)
POLICY_TYPE = "MlpPolicy"       # Policy type (Multi-Layer Perceptron for vector observations)
LOG_DIR = "logs/drone_ppo_ray/"     # Directory to save TensorBoard logs
MODEL_SAVE_PATH = "models/ppo_ray_drone_1" # Path to save the trained model
CHECKPOINT_FREQ = 50000         # Save a checkpoint every N steps
N_ENVS = 12                      # Number of parallel environments (adjust based on CPU cores)


## Training Agent 

In [4]:

import os
import gym

from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.callbacks import CheckpointCallback

# --- Environment Import ---
# IMPORTANT: Adjust this import path to choose the enviroment

from drone_2d_custom_gym_env.drone_2d_env_raycast import Drone2dEnvRaycastV2


if __name__ == "__main__":
    # --- Setup ---
    os.makedirs(LOG_DIR, exist_ok=True)
    os.makedirs(os.path.dirname(MODEL_SAVE_PATH), exist_ok=True)

    print("--- Starting Drone Training ---")
    print(f"Algorithm: {MODEL_ALGORITHM.__name__}")
    print(f"Total Timesteps: {TOTAL_TIMESTEPS}")
    print(f"Environment Config: {ENV_CONFIG}")
    print(f"Using {N_ENVS} parallel environments.")

    # --- Create Vectorized Environment ---
    # Use SubprocVecEnv for true parallelism, DummyVecEnv for debugging
    # `env_kwargs` passes the configuration dictionary to each environment instance
    env = make_vec_env(
        Drone2dEnvRaycastV2,
        n_envs=N_ENVS,
        seed=0,
        vec_env_cls=SubprocVecEnv, # Use SubprocVecEnv for parallel processing
        #vec_env_cls=DummyVecEnv, # Use DummyVecEnv for easier debugging
        env_kwargs=ENV_CONFIG
        )

    # --- Setup Model ---
    # Define the model with policy type, environment, and logging parameters
    # You might need to tune hyperparameters like learning_rate, n_steps, batch_size etc.
    model = MODEL_ALGORITHM(
        POLICY_TYPE,
        env,
        verbose=1, # Print training progress
        tensorboard_log=LOG_DIR,
        # Example of adjusting some hyperparameters (optional):
        # learning_rate=3e-4,
        # n_steps=2048, # Steps per env before update (adjust based on max_steps and N_ENVS)
        # batch_size=64,
        # gamma=0.99, # Discount factor
        # gae_lambda=0.95,
        # ent_coef=0.0, # Entropy coefficient
    )

    # --- Setup Checkpoint Callback ---
    # Saves the model periodically during training
    checkpoint_callback = CheckpointCallback(
        save_freq=max(CHECKPOINT_FREQ // N_ENVS, 1), # Adjust frequency based on N_ENVS
        save_path=LOG_DIR,
        name_prefix="drone_ppo_ckpt"
    )

    # --- Train the Agent ---
    print("\n--- Starting Training ---")
    try:
        model.learn(
            total_timesteps=TOTAL_TIMESTEPS,
            log_interval=10, # Log metrics every 10 updates
            tb_log_name=f"{MODEL_ALGORITHM.__name__}_Drone", # Name for TensorBoard run
            callback=checkpoint_callback # Add the checkpoint callback
            )
    except Exception as e:
        print(f"\n!!! Error during training: {e} !!!")
        print("Attempting to save model before exiting...")
        model.save(f"{MODEL_SAVE_PATH}_error")
    finally:
        # --- Save the Final Model ---
        print("\n--- Training Finished (or interrupted) ---")
        print(f"Saving final model to: {MODEL_SAVE_PATH}")
        model.save(MODEL_SAVE_PATH)

        # --- Clean up ---
        env.close() # Close the vectorized environment
        print("Environment closed.")

    print("\n--- Training Script Complete ---")
    print(f"To monitor training, run: tensorboard --logdir {LOG_DIR}")

# --- END OF FILE train_drone_agent.py ---


--- Starting Drone Training ---
Algorithm: PPO
Total Timesteps: 3000000
Environment Config: {'render_sim': False, 'max_steps': 750, 'render_path': False, 'render_shade': False, 'shade_distance_m': 2.0, 'moving_platform': True, 'platform_speed': 2.0, 'initial_pos_random_range_m': 8.0, 'max_allowed_tilt_angle_rad': 2, 'enable_wind': True, 'reward_landing': 500, 'reward_un_landing': 50}
Using 12 parallel environments.
RaycastV2 Observation Space Shape: (17,)
RaycastV2 Observation Space Shape: (17,)

--- Resetting Episode 1 ---
Wind Enabled: True, Wind Dir: 295.3 deg. Moving Platform: True
RaycastV2 Observation Space Shape: (17,)
RaycastV2 Observation Space Shape: (17,)

--- Resetting Episode 1 ---
Wind Enabled: True, Wind Dir: 138.8 deg. Moving Platform: True
RaycastV2 Observation Space Shape: (17,)

--- Resetting Episode 1 ---
Wind Enabled: True, Wind Dir: 313.9 deg. Moving Platform: True

--- Resetting Episode 1 ---
Wind Enabled: True, Wind Dir: 185.3 deg. Moving Platform: True

--- Res



Using cuda device

--- Starting Training ---
Logging to logs/drone_ppo_ray/PPO_Drone_1




Step 45: OUT OF BOUNDS!
Step 55: OUT OF BOUNDS!
Step 57: OUT OF BOUNDS!
Step 61: LOST CONTROL!
Step 74: LOST CONTROL!
Step 78: LOST CONTROL!
Step 85: OUT OF BOUNDS!
Step 101: OUT OF BOUNDS!
Step 102: LOST CONTROL!
Step 116: LOST CONTROL!
Step 121: LOST CONTROL!
Step 129: LOST CONTROL!
Step 84: OUT OF BOUNDS!
Step 99: LOST CONTROL!
Step 89: LOST CONTROL!
Step 101: OUT OF BOUNDS!
Step 81: OUT OF BOUNDS!
Step 65: OUT OF BOUNDS!
Step 93: LOST CONTROL!
Step 57: OUT OF BOUNDS!
Step 82: LOST CONTROL!
Step 68: LOST CONTROL!
Step 107: LOST CONTROL!
Step 124: LOST CONTROL!
Step 45: OUT OF BOUNDS!
Step 104: LOST CONTROL!
Step 100: LOST CONTROL!
Step 59: OUT OF BOUNDS!
Step 88: LOST CONTROL!
Step 99: LOST CONTROL!
Step 91: LOST CONTROL!
Step 97: OUT OF BOUNDS!
Step 65: OUT OF BOUNDS!
Step 76: LOST CONTROL!
Step 79: OUT OF BOUNDS!
Step 86: LOST CONTROL!
Step 77: OUT OF BOUNDS!
Step 101: OUT OF BOUNDS!
Step 70: OUT OF BOUNDS!
Step 86: LOST CONTROL!
Step 103: OUT OF BOUNDS!
Step 110: LOST CONTROL!
St



**Explanation and How to Use:**

1.  **Save:** Save this code as a Python file (e.g., `train_drone_agent.py`) in your project, likely at the root level or wherever you keep your training scripts.
2.  **Adjust Import:** **Crucially**, fix the import path for `Drone2dEnv` at the top of the script to match your project's directory structure. The provided `try...except` block attempts common scenarios.
3.  **Configure Training:**
    *   `TOTAL_TIMESTEPS`: Set how long you want to train (1.8 million is a reasonable starting point, but might need more or less).
    *   `LOG_DIR`: Choose where TensorBoard logs will be saved.
    *   `MODEL_SAVE_PATH`: Choose where the final trained model `.zip` file will be saved.
    *   `CHECKPOINT_FREQ`: How often to save intermediate models (useful if training crashes).
    *   `N_ENVS`: Number of parallel environments. Start with `4` or match the number of CPU cores you have available. Using more environments generally speeds up training but uses more RAM.
4.  **Configure Environment:** Modify the `ENV_CONFIG` dictionary to set the desired parameters for the `Drone2dEnv` during training (wind, moving platform, start range, etc.). **Remember to keep `render_sim=False` for efficient training.**
5.  **Install Dependencies:** Make sure you have the necessary libraries installed in your environment:
    ```bash
    pip install stable-baselines3[extra] gym pygame pymunk numpy
    pip install 'shimmy>=2.0
    # Or if using gymnasium:
    # pip install stable-baselines3[extra] gymnasium pygame pymunk numpy
    ```
    *(You might already have these from setting up the environment)*
6.  **Run Training:** Open your terminal, navigate to the directory where you saved the script, and run:
    ```bash
    python train_drone_agent.py
    ```
7.  **Monitor with TensorBoard:** While training is running (or after it finishes), open *another* terminal, navigate to the same project directory (or one level above `LOG_DIR`), and run:
    ```bash
    tensorboard --logdir logs/drone_ppo/
    ```
    Then open the URL provided by TensorBoard (usually `http://localhost:6006/`) in your web browser to see graphs of the reward, loss functions, episode length, etc. This is essential for understanding if the agent is learning.
8.  **Tuning:** If the agent doesn't learn well, you may need to:
    *   Adjust the environment parameters (e.g., make the task easier initially by disabling wind or the moving platform).
    *   Tune the PPO hyperparameters within the `model = MODEL_ALGORITHM(...)` call (learning rate, rollout buffer size `n_steps`, etc.).
    *   Modify the reward function in `drone_2d_env.py` if the current shaping isn't effective.
    *   Train for longer (increase `TOTAL_TIMESTEPS`).

# Continuing/ resume training

## Configurations  - Continue Training 

In [9]:
# --- Training Configuration ---
# *** Paths ***
LOAD_MODEL_PATH = "models/ppo_ray_drone_1.zip" # <-- IMPORTANT: Path to the model saved previously
NEW_SAVE_PATH = "models/ppo_ray_drone_1_continued" # <-- Path to save the model after *this* training session
LOG_DIR = "logs/drone_ppo/" # <-- Directory for TensorBoard logs (can be same or new)

# *** Training Parameters ***
ADDITIONAL_TIMESTEPS = 4_000_000 # <-- How many *more* steps to train
MODEL_ALGORITHM = PPO         # <-- Must match the algorithm of the loaded model
POLICY_TYPE = "MlpPolicy"     # <-- Must match the policy type of the loaded model
CHECKPOINT_FREQ = 50000       # Save a checkpoint every N steps (during this session)
N_ENVS = 10                    # Number of parallel environments (match previous setup if possible)

**Test Load**

In [10]:
from stable_baselines3 import PPO

# Attempt to load the saved model
model_loaded = PPO.load(LOAD_MODEL_PATH)
print("Model loaded successfully!")

Model loaded successfully!


## Continuing training ENV 1

**Key features:**

*   Specifies the path to the model you want to load (`LOAD_MODEL_PATH`).
*   Specifies a *new* path to save the model after continued training (`NEW_SAVE_PATH`).
*   Specifies the number of *additional* timesteps to train for (`ADDITIONAL_TIMESTEPS`).
*   Uses the same environment configuration (`ENV_CONFIG`) - **important for compatibility**.
*   Sets `reset_num_timesteps=False` in `model.learn()` to ensure the training step count and logs continue correctly.
*   Includes error handling if the specified model to load doesn't exist.

In [12]:


import os
import gym
from stable_baselines3 import PPO  # Or the algorithm you used (e.g., SAC, TD3)
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.callbacks import CheckpointCallback

# --- Environment Import ---

from drone_2d_custom_gym_env.drone_2d_env_raycast import Drone2dEnvRaycastV2



# --- Setup ---
os.makedirs(os.path.dirname(NEW_SAVE_PATH), exist_ok=True)
os.makedirs(LOG_DIR, exist_ok=True) # Log dir might already exist

print("--- Starting Drone Training Continuation ---")
print(f"Attempting to load model from: {LOAD_MODEL_PATH}")
print(f"Training for additional {ADDITIONAL_TIMESTEPS} timesteps.")
print(f"Environment Config: {ENV_CONFIG}")

# --- Check if Model Exists ---
if not os.path.exists(LOAD_MODEL_PATH):
    print(f"\n!!! Error: Model file not found at {LOAD_MODEL_PATH} !!!")
    print("Cannot continue training without a model to load.")
    exit()

# --- Create Vectorized Environment ---
# Needs to be created *before* loading the model
try:
    env = make_vec_env(
        Drone2dEnvRaycastV2,
        n_envs=N_ENVS,
        seed=0, # Seed can be different, but structure should be same
        vec_env_cls=SubprocVecEnv,
        env_kwargs=ENV_CONFIG
    )
except Exception as e:
    print(f"\n!!! Error creating environment: {e} !!!")
    exit()

# --- Load Model ---
try:
    print(f"\nLoading model...")
    # Pass the environment and tensorboard log directory to load
    # SB3 automatically detects the algorithm and policy type from the zip file
    model = MODEL_ALGORITHM.load(
        LOAD_MODEL_PATH,
        env=env, # Link the loaded model to the (new) environment instances
        tensorboard_log=LOG_DIR # Tell SB3 where to continue logging
    )
    print(f"Model loaded successfully. Current Timesteps: {model.num_timesteps}")
except Exception as e:
    print(f"\n!!! Error loading model from {LOAD_MODEL_PATH}: {e} !!!")
    print("Check if the file exists, is a valid SB3 model, and matches the environment structure.")
    env.close()
    exit()

# --- Setup Checkpoint Callback ---
checkpoint_callback = CheckpointCallback(
    save_freq=max(CHECKPOINT_FREQ // N_ENVS, 1),
    save_path=LOG_DIR, # Save checkpoints in the log directory
    name_prefix=f"{os.path.basename(NEW_SAVE_PATH)}_ckpt" # Prefix based on new save name
)

# --- Continue Training ---
print("\n--- Continuing Training ---")
start_timesteps = model.num_timesteps # Get current steps from loaded model
target_timesteps = start_timesteps + ADDITIONAL_TIMESTEPS
print(f"Training from {start_timesteps} to {target_timesteps} total steps.")

try:
    model.learn(
        total_timesteps=ADDITIONAL_TIMESTEPS, # Train for the *additional* steps
        log_interval=10,
        tb_log_name=f"{MODEL_ALGORITHM.__name__}_Drone", # Use same or new log name
        reset_num_timesteps=False, # <-- IMPORTANT: Do NOT reset timestep counter
        callback=checkpoint_callback
        )
except Exception as e:
    print(f"\n!!! Error during continued training: {e} !!!")
    print("Attempting to save model before exiting...")
    model.save(f"{NEW_SAVE_PATH}_error")
finally:
    # --- Save the Final Model ---
    print("\n--- Training Finished (or interrupted) ---")
    print(f"Saving final model to: {NEW_SAVE_PATH}.zip") # SB3 adds .zip automatically
    model.save(NEW_SAVE_PATH) # Use the NEW save path

    # --- Clean up ---
    env.close()
    print("Environment closed.")

print("\n--- Training Continuation Script Complete ---")
print(f"To monitor training, run: tensorboard --logdir {LOG_DIR}")




--- Starting Drone Training Continuation ---
Attempting to load model from: models/ppo_ray_drone_1.zip
Training for additional 4000000 timesteps.
Environment Config: {'render_sim': False, 'max_steps': 750, 'render_path': False, 'render_shade': False, 'shade_distance_m': 2.0, 'moving_platform': True, 'platform_speed': 2.0, 'initial_pos_random_range_m': 8.0, 'max_allowed_tilt_angle_rad': 2, 'enable_wind': True, 'reward_landing': 500, 'reward_un_landing': 50}
RaycastV2 Observation Space Shape: (17,)RaycastV2 Observation Space Shape: (17,)
RaycastV2 Observation Space Shape: (17,)


--- Resetting Episode 1 ---
Wind Enabled: True, Wind Dir: 92.3 deg. Moving Platform: True
--- Resetting Episode 1 ---
Wind Enabled: True, Wind Dir: 13.0 deg. Moving Platform: True

RaycastV2 Observation Space Shape: (17,)
RaycastV2 Observation Space Shape: (17,)
RaycastV2 Observation Space Shape: (17,)

--- Resetting Episode 1 ---
Wind Enabled: True, Wind Dir: 1.9 deg. Moving Platform: True

--- Resetting Episod



Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!

Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!Step 750: MAX STEPS REACHED!

Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 544: OUT OF BOUNDS!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEPS REACHED!
Step 750: MAX STEP


**Explanation and How to Use:**

1.  **Save:** Save this code as `continue_training.py` (or similar) in your project.
2.  **Configure Paths:**
    *   Set `LOAD_MODEL_PATH` to the exact path of the `.zip` file generated by your previous training run (e.g., `models/ppo_drone_final.zip`).
    *   Set `NEW_SAVE_PATH` to where you want the model saved *after this continuation*. It's generally good practice to give it a distinct name initially (e.g., `models/ppo_drone_final_continued`).
    *   Set `LOG_DIR` (usually the same as before, so TensorBoard shows one continuous graph).
3.  **Configure Training:**
    *   Set `ADDITIONAL_TIMESTEPS` to the number of *extra* steps you want to train for.
    *   Ensure `MODEL_ALGORITHM` matches the algorithm used to create the loaded model (the script will load it automatically, but it's good practice).
4.  **Configure Environment (`ENV_CONFIG`):** Make sure this dictionary matches the settings used during the *original* training run, or at least is compatible (same observation/action space dimensions).
5.  **Run:** Execute the script from your terminal:
    ```bash
    python continue_training.py
    ```
6.  **Monitor:** Use TensorBoard as before, pointing it to the `LOG_DIR`. You should see the training metrics continue from where the previous run left off.

This script provides a robust way to resume training your drone landing agent.

# Evaluation Agent

## Evaluation Configuration

In [9]:
# --- Evaluation Configuration ---
# *** Paths ***
MODEL_PATH = "models/ppo_ray_drone_1.zip" # <-- IMPORTANT: Path to the trained model to evaluate
# MODEL_PATH = "logs/drone_ppo/drone_ppo_ckpt_100000_steps.zip" # Example for loading a checkpoint

# *** Evaluation Parameters ***
NUM_EVAL_EPISODES = 1000        # How many episodes to run for evaluation
RENDER_SIM = True             # Set to True to watch the agent perform
#RENDER_SIM = False             # Set to True to watch the agent perform
#RENDER_DELAY_S = 0.03         # Delay between frames when rendering (seconds)
RENDER_DELAY_S = 0.0         # Delay between frames when rendering (seconds) 0.00 - 0.03


# *** Environment Configuration for Evaluation ***
# Use settings representative of how you expect the agent to perform,
# or the specific conditions you want to test.
# Often similar or identical to training config, but render_sim=True here.
ENV_CONFIG_EVAL = {

    "render_sim": RENDER_SIM,         # Use the RENDER_SIM flag here
    "max_steps": 600,                # Allow more steps for evaluation if needed
    "render_path": True,              # Enable path/shade rendering for visualization
    "render_shade": True,
    "shade_distance_m": 2.0,
             
    # No if we are going to evaluate same conditions
    
    "moving_platform": MOVING_PLATFORM,        #  Train with moving platform
    "platform_speed": PLATFORM_SPEED,           #  Platform speed if moving
    "initial_pos_random_range_m": INITIAL_POST_RAMDOM_RANGE_M,#  Random start range
    "max_allowed_tilt_angle_rad": MAX_ALLOWED_TILT_ANGLE_RAD, #  Allow slightly more tilt (approx 90 deg)
    "enable_wind": ENABLE_WIND,             #  Train with wind enabled
    'reward_landing': REWARD_LANDING,           #  reward for landing more high reward increase interest   
    'reward_un_landing': REWARD_UN_LANDING          #  reward for unstable landing more high reward increase interest    
    # Note: wind_speed is fixed at 5.0 inside the env for now
}

In [10]:


import os
import gym
import numpy as np
import time
import pygame # Import pygame directly for flip
from stable_baselines3 import PPO # Or the algorithm you used (e.g., SAC, TD3)
# REMOVED: from stable_baselines3.common.vec_env import DummyVecEnv

# --- Environment Import ---
from drone_2d_custom_gym_env.drone_2d_env_raycast import Drone2dEnvRaycastV2





# --- Helper Function --- MODIFIED ---
def evaluate_agent(model, env, num_episodes=10, render=False, delay=0.0):
    """
    Evaluates a Stable Baselines3 agent on a single environment instance.

    :param model: The agent to evaluate.
    :param env: The single environment instance.
    :param num_episodes: Number of episodes to run.
    :param render: Whether to render the environment.
    :param delay: Delay between frames if rendering.
    :return: Dictionary containing evaluation statistics including outcome counts.
    """
    episode_rewards = []
    episode_lengths = []
    # --- Outcome Counters ---
    episode_outcomes = {
        "Landed Safely": 0,
        "Crashed": 0,
        "Lost Control": 0,
        "Out of Bounds": 0,
        "Battery Empty": 0,
        "Timeout": 0,
        "Error": 0, # For unexpected terminations
    }
    # --- End Counters ---

    # Get clock and fps if rendering and available
    the_clock = None; render_fps = 50
    if render and hasattr(env, 'clock') and env.clock: the_clock = env.clock
    if render and hasattr(env, 'metadata') and 'render_fps' in env.metadata: render_fps = env.metadata['render_fps']

    for episode in range(num_episodes):
        obs = env.reset()
        done = False
        current_episode_reward = 0
        current_episode_length = 0
        final_info_dict = {} # Store final info from step before termination

        print(f"  Starting Eval Episode {episode + 1}/{num_episodes}...")

        while not done:
            if render:
                try:
                    env.render(); pygame.display.flip()
                    if the_clock: the_clock.tick(render_fps)
                    if delay > 0: time.sleep(delay)
                except Exception as e: print(f"Error rendering/updating display: {e}"); render = False

            action, _states = model.predict(obs, deterministic=True)

            try:
                obs, reward, done, info = env.step(action)
                current_episode_reward += reward
                current_episode_length += 1
                final_info_dict = info # Store latest info

            except Exception as e:
                 print(f"Error during env.step(): {e}. Terminating episode.")
                 done = True
                 final_info_dict['error'] = f"Exception during step: {e}" # Mark error in dict

        # --- Episode Finished ---
        episode_rewards.append(current_episode_reward)
        episode_lengths.append(current_episode_length)

        # --- Increment Outcome Counter (Prioritized) ---
        outcome_recorded = False
        if final_info_dict.get('error'): # Check for explicit error first
            episode_outcomes["Error"] += 1
            print(f"  Episode {episode + 1}: FAILED (Error during step)")
            outcome_recorded = True
        elif final_info_dict.get('landed_safely', False):
            episode_outcomes["Landed Safely"] += 1
            print(f"  Episode {episode + 1}: SUCCESS (Landed Safely)")
            outcome_recorded = True
        elif final_info_dict.get('crashed', False):
            episode_outcomes["Crashed"] += 1
            print(f"  Episode {episode + 1}: FAILED (Crashed)")
            outcome_recorded = True
        elif final_info_dict.get('lost_control', False):
            episode_outcomes["Lost Control"] += 1
            print(f"  Episode {episode + 1}: FAILED (Lost Control)")
            outcome_recorded = True
        elif final_info_dict.get('out_of_bounds', False):
            episode_outcomes["Out of Bounds"] += 1
            print(f"  Episode {episode + 1}: FAILED (Out of Bounds)")
            outcome_recorded = True
        elif final_info_dict.get('Battery_empty', False):
            episode_outcomes["Battery Empty"] += 1
            print(f"  Episode {episode + 1}: FAILED (Battery Empty)")
        # Check timeout ONLY if no other condition was met
        elif current_episode_length >= env.max_steps:
            episode_outcomes["Timeout"] += 1
            print(f"  Episode {episode + 1}: FAILED (Timeout)")
            outcome_recorded = True

        # Fallback if done=True but no specific flag was set (should be rare)
        if not outcome_recorded and done:
             episode_outcomes["Error"] += 1 # Count as error
             print(f"  Episode {episode + 1}: FAILED (Unknown - 'done' but no specific flag)")
        # --- End Outcome Counter ---

        print(f"  Episode {episode + 1}: Length={current_episode_length}, Reward={current_episode_reward:.2f}")

    # --- Calculate Statistics ---
    mean_reward = np.mean(episode_rewards) if episode_rewards else 0; std_reward = np.std(episode_rewards) if episode_rewards else 0
    mean_length = np.mean(episode_lengths) if episode_lengths else 0
    # Calculate success rate based on counter
    success_rate = episode_outcomes["Landed Safely"] / num_episodes if num_episodes > 0 else 0

    stats = {
        "mean_reward": mean_reward, "std_reward": std_reward,
        "mean_length": mean_length, "success_rate": success_rate,
        "outcomes": episode_outcomes, # Include the outcome counts
        "total_episodes": num_episodes
    }
    return stats


if __name__ == "__main__":
    print("--- Starting Drone Agent Evaluation ---")
    print(f"Loading model from: {MODEL_PATH}")
    print(f"Evaluating for {NUM_EVAL_EPISODES} episodes.")
    print(f"Evaluation Environment Config: {ENV_CONFIG_EVAL}")

    if not os.path.exists(MODEL_PATH):
        print(f"\n!!! Error: Model file not found at {MODEL_PATH} !!!"); exit()

    # --- Create SINGLE Evaluation Environment ---
    try:
        eval_env = Drone2dEnvRaycastV2(**ENV_CONFIG_EVAL)
        print("Single evaluation environment created.")
    except Exception as e: print(f"\n!!! Error creating evaluation environment: {e} !!!"); exit()

    # --- Load Model ---
    try:
        loaded_model = PPO.load(MODEL_PATH, env=eval_env) # Pass single env
        print("\nModel loaded successfully.")
    except Exception as e: print(f"\n!!! Error loading model: {e} !!!"); eval_env.close(); exit()

    # --- Run Evaluation ---
    print("\n--- Running Evaluation Loop ---")
    eval_stats = evaluate_agent(
        model=loaded_model, env=eval_env, num_episodes=NUM_EVAL_EPISODES,
        render=RENDER_SIM, delay=RENDER_DELAY_S
    )

    # --- Print Summary --- MODIFIED ---
    print("\n\n--- Evaluation Summary ---")
    print(f"Evaluated for {eval_stats['total_episodes']} episodes.")
    print(f"Mean Reward: {eval_stats['mean_reward']:.2f} +/- {eval_stats['std_reward']:.2f}")
    print(f"Mean Episode Length: {eval_stats['mean_length']:.1f}")
    print(f"Success Rate (Landed Safely): {eval_stats['success_rate']:.2%}")
    print("\nEpisode Outcome Counts:")
    for outcome, count in eval_stats['outcomes'].items():
        # Print even if count is 0 for completeness
        print(f"  - {outcome}: {count}")
    print("--------------------------")
    # --- End Modification ---

    # --- Cleanup ---
    eval_env.close()
    print("Evaluation environment closed.")
    print("--- Evaluation Script Complete ---")



--- Starting Drone Agent Evaluation ---
Loading model from: models/ppo_ray_drone_1.zip
Evaluating for 1000 episodes.
Evaluation Environment Config: {'render_sim': True, 'max_steps': 600, 'render_path': True, 'render_shade': True, 'shade_distance_m': 2.0, 'moving_platform': True, 'platform_speed': 2.0, 'initial_pos_random_range_m': 8.0, 'max_allowed_tilt_angle_rad': 2, 'enable_wind': True, 'reward_landing': 500, 'reward_un_landing': 50}
RaycastV2 Observation Space Shape: (17,)

--- Resetting Episode 1 ---
Wind Enabled: True, Wind Dir: 160.1 deg. Moving Platform: True
Single evaluation environment created.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.

Model loaded successfully.

--- Running Evaluation Loop ---
  Starting Eval Episode 1/1000...


KeyboardInterrupt: 