# Env Basic sensor (Simplified Geometric Sensor) detects platfor moving

# Test Environment 

In [22]:


import gym
import numpy as np
import time


from drone_2d_custom_gym_env.drone_2d_env import Drone2dEnv


# --- Test Configuration ---
NUM_EPISODES = 5       # How many episodes to run
MAX_STEPS_PER_EPISODE = 750 # Max steps before terminating an episode
RENDER_SIM = True      # Set to True to visualize the simulation
RENDER_DELAY_S = 0.02  # Small delay (in seconds) between frames if rendering

# --- Environment Options ---
# Choose the configuration you want to test
ENABLE_WIND_TEST = True
MOVING_PLATFORM_TEST = True
PLATFORM_SPEED_TEST = 2.5 # Meters per second
INITIAL_RANDOM_RANGE_TEST = 10.0 # Meters (+/- from center)
MAX_TILT_ANGLE_DEG_TEST = 90.0 # Degrees
LANDER_MASS = 3 #kg

def run_test():
    """Runs the test loop for the Drone2dEnv."""
    print("--- Starting Drone Environment Test ---")

    # Convert degrees to radians for the environment parameter
    max_tilt_rad = np.radians(MAX_TILT_ANGLE_DEG_TEST)

    # Initialize the environment with chosen options
    try:
        env = Drone2dEnv(
            render_sim=RENDER_SIM,
            max_steps=MAX_STEPS_PER_EPISODE,
            moving_platform=MOVING_PLATFORM_TEST,
            platform_speed=PLATFORM_SPEED_TEST,
            initial_pos_random_range_m=INITIAL_RANDOM_RANGE_TEST,
            enable_wind=ENABLE_WIND_TEST,
            max_allowed_tilt_angle_rad=max_tilt_rad,           
            lander_mass = LANDER_MASS,
            wind_speed=20.0,               # Initial speed before first dynamic change
            
        )
    except Exception as e:
        print(f"\n!!! Error creating environment: {e} !!!")
        print("Please check the environment's __init__ method and parameters.")
        return

    print("\n--- Environment Initialized ---")
    print(f"Action Space: {env.action_space}")
    print(f"Observation Space: {env.observation_space}")
    # print(f"Sample Observation: {env.observation_space.sample()}") # Can be useful for checking bounds

    total_steps_all_episodes = 0

    for episode in range(NUM_EPISODES):
        print(f"\n--- Starting Episode {episode + 1}/{NUM_EPISODES} ---")
        try:
            # Reset the environment for a new episode
            obs = env.reset()
            print(f"Initial Observation sample: {obs[:4]}...") # Print start of obs
        except Exception as e:
            print(f"\n!!! Error resetting environment: {e} !!!")
            print("Skipping episode.")
            continue

        done = False
        total_reward = 0
        step_count = 0

        while not done:
            # Render the current state (if enabled)
            if RENDER_SIM:
                try:
                    env.render()
                    if RENDER_DELAY_S > 0:
                        time.sleep(RENDER_DELAY_S)
                except Exception as e:
                    print(f"\n!!! Error rendering environment: {e} !!!")
                    print("Disabling rendering for this episode.")
                    # Optionally disable rendering completely: RENDER_SIM = False
                    break # Stop this episode if rendering fails badly

            # --- Action Selection ---
            # For testing, use random actions. Replace with your agent's policy later.
            action = env.action_space.sample()
            # print(f"Step {step_count}: Action = {action}") # Uncomment for debugging action values

            # --- Step the Environment ---
            try:
                obs, reward, done, info = env.step(action)
                total_reward += reward
                step_count += 1
                total_steps_all_episodes += 1
            except Exception as e:
                print(f"\n!!! Error during env.step() at step {step_count}: {e} !!!")
                print("Terminating episode.")
                done = True # Force episode termination
                info = info if 'info' in locals() else {} # Use existing info if available
                info['error'] = f"Exception during step: {e}"

            # Optional: Print step info periodically
            # if step_count % 100 == 0:
            #     print(f"  Step: {step_count}, Reward: {reward:.3f}, Done: {done}")

            # Check if max steps reached (env should handle this with 'done', but as a backup)
            if step_count >= MAX_STEPS_PER_EPISODE and not done:
                print("Warning: Max steps reached in test loop, but env not 'done'.")
                done = True # Force termination in test script

        # --- Episode End ---
        print(f"--- Episode {episode + 1} Finished ---")
        print(f"Steps taken: {step_count}")
        print(f"Total Reward: {total_reward:.2f}")
        print(f"Final Info: {info}")

        # Render the final frame after done (if rendering)
        if RENDER_SIM:
             try: env.render(); time.sleep(0.5) # Pause briefly on final frame
             except: pass # Ignore render errors on final frame

    # --- Cleanup ---
    try:
        env.close()
        print("\nEnvironment Closed.")
    except Exception as e:
        print(f"\nError closing environment: {e}")

    print(f"\n--- Test Finished ---")
    print(f"Total steps across all episodes: {total_steps_all_episodes}")


if __name__ == "__main__":
    run_test()

--- Starting Drone Environment Test ---
World Size: 50.0m x 50.0m
Screen Size: 800.0px x 800.0px
Render Zoom Factor: 1.0
Effective Pixels Per Meter: 16.00

--- Resetting Episode 1 ---
Wind Enabled: True, Wind Dir: 341.9 deg. Moving Platform: True

--- Environment Initialized ---
Action Space: Box(-1.0, 1.0, (2,), float32)
Observation Space: Box([-1. -1. -1. -1. -1. -1.  0. -1. -1. -1.  0. -1.], 1.0, (12,), float32)

--- Starting Episode 1/5 ---
Initial Observation sample: [0.10491447 0.9425615  0.         0.        ]...
Step 212: LOST CONTROL!
--- Episode 1 Finished ---
Steps taken: 213
Total Reward: -29.70
Final Info: {'Battery': np.float32(93.2615), 'landed': False, 'crashed': False, 'out_of_bounds': False, 'Battery_empty': False, 'lost_control': True, 'steps': 213, 'raw_pos': (9.640512191038173, 29.712429901564665), 'raw_vel': (-9.930603581128743, -9.054614481996936), 'raw_angle_rad': 1.614351889017876, 'raw_angular_vel': 9.83886092933783, 'platform_pos_x': 35.64999999999989, 'platf

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


# Training the model (Agent)

## Training Configuration (Env Configuration)

In [None]:
from stable_baselines3 import PPO

# --- Training Configuration ---
TOTAL_TIMESTEPS = 2_000_000      # Total steps for training (adjust as needed)
MODEL_ALGORITHM = PPO           # Algorithm to use (PPO is a good default)
POLICY_TYPE = "MlpPolicy"       # Policy type (Multi-Layer Perceptron for vector observations)
LOG_DIR = "logs/drone_ppo/"     # Directory to save TensorBoard logs
MODEL_SAVE_PATH = "models/ppo_drone_20_wind_3KG_4_T" # Path to save the trained model
CHECKPOINT_FREQ = 50000         # Save a checkpoint every N steps
N_ENVS = 10                      # Number of parallel environments (adjust based on CPU cores)

# this setup takes 22 minutes


# --- Configuration Options ---
RENDER = True,

GRAVITY = 9.81

MOVING_PLATFORM = False               #  Train with moving platform
PLATFORM_SPEED = 2.0                 #  Platform speed if moving   

INITIAL_POST_RAMDOM_RANGE_M = 8.0    #  Random start range
MAX_ALLOWED_TILT_ANGLE_RAD = 4       #  Allow slightly more tilt (approx (2 ~ 90 deg)-(3 ~ 180 de)
ENABLE_WIND = True
WIND_SPEED = 20                     #  m/s - intetnt to train on 20 m/s
REWARD_LANDING = 500                 #  reward for landing more high reward increase interest   
REWARD_UN_LANDING = 300
INITIAL_RANDOM_RANGE = 9.0 # +/- 10m range

# Drone Parameters

LANDER_MASS = 3 #kg
LANDER_WIDTH = 1 # Meters
LANDER_HEIGHT = 1 # Meters
MAX_THRUST = 15                 # The unit is likely Newtons (N). This limits the drone's ability to accelerate and counteract gravity
THRUST_NOISE = 0.5
MAX_SAFE_LANDING_SPEED = 1.5
MAX_SAFE_LANDING_ANGLE = 0.2

In [None]:



# --- Environment Configuration ---
# These should match the parameters you want to train the agent on
ENV_CONFIG = {
    "render_sim": False,             # IMPORTANT: Keep False for faster training
    "max_steps": 1000,                # Max steps per episode during training
    "render_path": False,            # Disable rendering options for speed
    "render_shade": False,
    "shade_distance_m": 2.0,
    
             
    # No change if we are going resume training training
    'gravity_mag': GRAVITY,                     # world gravity 9.81 earth
    "moving_platform": MOVING_PLATFORM,        #  Train with moving platform
    "platform_speed": PLATFORM_SPEED,           #  Platform speed if moving
    "initial_pos_random_range_m": INITIAL_POST_RAMDOM_RANGE_M,#  Random start range
    "max_allowed_tilt_angle_rad": MAX_ALLOWED_TILT_ANGLE_RAD, #  Allow slightly more tilt (approx 90 deg)
    "enable_wind": ENABLE_WIND,
    "wind_speed" : WIND_SPEED, #  Train with wind enabled
    'reward_landing': REWARD_LANDING,           #  reward for landing more high reward increase interest   
    'reward_un_landing': REWARD_UN_LANDING,          #  reward for unstable landing more high reward increase interest   
    'lander_mass': LANDER_MASS,
     "lander_width" : LANDER_WIDTH,              # width in meters
    "lander_height" : LANDER_HEIGHT, 
    'max_thrust': MAX_THRUST,                   # This is the maximum force that the drone's thrusters can produce
    'thrust_noise': THRUST_NOISE,               # meters
    'max_safe_landing_speed': MAX_SAFE_LANDING_SPEED,
    'max_safe_landing_angle': MAX_SAFE_LANDING_ANGLE,
    
}





## Training Agent 

In [16]:

import os
import gym

from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.callbacks import CheckpointCallback

# --- Environment Import ---
# IMPORTANT: Adjust this import path to choose the enviroment
from drone_2d_custom_gym_env.drone_2d_env import Drone2dEnv



if __name__ == "__main__":
    # --- Setup ---
    os.makedirs(LOG_DIR, exist_ok=True)
    os.makedirs(os.path.dirname(MODEL_SAVE_PATH), exist_ok=True)

    print("--- Starting Drone Training ---")
    print(f"Algorithm: {MODEL_ALGORITHM.__name__}")
    print(f"Total Timesteps: {TOTAL_TIMESTEPS}")
    print(f"Environment Config: {ENV_CONFIG}")
    print(f"Using {N_ENVS} parallel environments.")

    # --- Create Vectorized Environment ---
    # Use SubprocVecEnv for true parallelism, DummyVecEnv for debugging
    # `env_kwargs` passes the configuration dictionary to each environment instance
    env = make_vec_env(
        Drone2dEnv,
        n_envs=N_ENVS,
        seed=0,
        vec_env_cls=SubprocVecEnv, # Use SubprocVecEnv for parallel processing
        #vec_env_cls=DummyVecEnv, # Use DummyVecEnv for easier debugging
        env_kwargs=ENV_CONFIG
        )

    # --- Setup Model ---
    # Define the model with policy type, environment, and logging parameters
    # You might need to tune hyperparameters like learning_rate, n_steps, batch_size etc.
    model = MODEL_ALGORITHM(
        POLICY_TYPE,
        env,
        verbose=1, # Print training progress
        tensorboard_log=LOG_DIR,
        # Example of adjusting some hyperparameters (optional):
        # learning_rate=3e-4,
        # n_steps=2048, # Steps per env before update (adjust based on max_steps and N_ENVS)
        # batch_size=64,
        # gamma=0.99, # Discount factor
        # gae_lambda=0.95,
        # ent_coef=0.0, # Entropy coefficient
    )

    # --- Setup Checkpoint Callback ---
    # Saves the model periodically during training
    checkpoint_callback = CheckpointCallback(
        save_freq=max(CHECKPOINT_FREQ // N_ENVS, 1), # Adjust frequency based on N_ENVS
        save_path=LOG_DIR,
        name_prefix="drone_ppo_ckpt"
    )

    # --- Train the Agent ---
    print("\n--- Starting Training ---")
    try:
        model.learn(
            total_timesteps=TOTAL_TIMESTEPS,
            log_interval=10, # Log metrics every 10 updates
            tb_log_name=f"{MODEL_ALGORITHM.__name__}_Drone", # Name for TensorBoard run
            callback=checkpoint_callback # Add the checkpoint callback
            )
    except Exception as e:
        print(f"\n!!! Error during training: {e} !!!")
        print("Attempting to save model before exiting...")
        model.save(f"{MODEL_SAVE_PATH}_error")
    finally:
        # --- Save the Final Model ---
        print("\n--- Training Finished (or interrupted) ---")
        print(f"Saving final model to: {MODEL_SAVE_PATH}")
        model.save(MODEL_SAVE_PATH)

        # --- Clean up ---
        env.close() # Close the vectorized environment
        print("Environment closed.")

    print("\n--- Training Script Complete ---")
    print(f"To monitor training, run: tensorboard --logdir {LOG_DIR}")

# --- END OF FILE train_drone_agent.py ---


--- Starting Drone Training ---
Algorithm: PPO
Total Timesteps: 2000000
Environment Config: {'render_sim': False, 'max_steps': 1000, 'render_path': False, 'render_shade': False, 'shade_distance_m': 2.0, 'moving_platform': False, 'platform_speed': 2.0, 'initial_pos_random_range_m': 8.0, 'max_allowed_tilt_angle_rad': 4, 'enable_wind': True, 'wind_speed': 20, 'reward_landing': 500, 'reward_un_landing': 300, 'lander_mass': 3}
Using 10 parallel environments.
World Size: 50.0m x 50.0mWorld Size: 50.0m x 50.0mWorld Size: 50.0m x 50.0mWorld Size: 50.0m x 50.0mWorld Size: 50.0m x 50.0mWorld Size: 50.0m x 50.0m




Screen Size: 800.0px x 800.0pxScreen Size: 800.0px x 800.0px

Screen Size: 800.0px x 800.0pxScreen Size: 800.0px x 800.0pxScreen Size: 800.0px x 800.0px
Render Zoom Factor: 1.0Render Zoom Factor: 1.0


Render Zoom Factor: 1.0
Render Zoom Factor: 1.0
Render Zoom Factor: 1.0

Effective Pixels Per Meter: 16.00Effective Pixels Per Meter: 16.00Effective Pixels Per Meter: 16.00


Effective 



Step 231: OUT OF BOUNDS!
Step 327: OUT OF BOUNDS!
Step 158: OUT OF BOUNDS!
Step 233: OUT OF BOUNDS!
Step 164: OUT OF BOUNDS!
Step 260: OUT OF BOUNDS!
Step 159: OUT OF BOUNDS!
Step 197: OUT OF BOUNDS!
Step 221: OUT OF BOUNDS!
Step 201: OUT OF BOUNDS!
Step 204: OUT OF BOUNDS!
Step 170: OUT OF BOUNDS!
Step 227: OUT OF BOUNDS!
Step 220: OUT OF BOUNDS!
Step 197: OUT OF BOUNDS!
Step 210: OUT OF BOUNDS!

--- Resetting Episode 11 ---
Wind Enabled: True, Wind Dir: 311.8 deg. Moving Platform: False

--- Resetting Episode 11 ---
Wind Enabled: True, Wind Dir: 122.6 deg. Moving Platform: False
Step 155: OUT OF BOUNDS!

--- Resetting Episode 11 ---
Wind Enabled: True, Wind Dir: 110.1 deg. Moving Platform: False
Step 209: OUT OF BOUNDS!

--- Resetting Episode 11 ---
Wind Enabled: True, Wind Dir: 183.5 deg. Moving Platform: False

--- Resetting Episode 11 ---
Wind Enabled: True, Wind Dir: 164.8 deg. Moving Platform: False

--- Resetting Episode 11 ---
Wind Enabled: True, Wind Dir: 130.1 deg. Moving Pl



**Explanation and How to Use:**

1.  **Save:** Save this code as a Python file (e.g., `train_drone_agent.py`) in your project, likely at the root level or wherever you keep your training scripts.
2.  **Adjust Import:** **Crucially**, fix the import path for `Drone2dEnv` at the top of the script to match your project's directory structure. The provided `try...except` block attempts common scenarios.
3.  **Configure Training:**
    *   `TOTAL_TIMESTEPS`: Set how long you want to train (1.8 million is a reasonable starting point, but might need more or less).
    *   `LOG_DIR`: Choose where TensorBoard logs will be saved.
    *   `MODEL_SAVE_PATH`: Choose where the final trained model `.zip` file will be saved.
    *   `CHECKPOINT_FREQ`: How often to save intermediate models (useful if training crashes).
    *   `N_ENVS`: Number of parallel environments. Start with `4` or match the number of CPU cores you have available. Using more environments generally speeds up training but uses more RAM.
4.  **Configure Environment:** Modify the `ENV_CONFIG` dictionary to set the desired parameters for the `Drone2dEnv` during training (wind, moving platform, start range, etc.). **Remember to keep `render_sim=False` for efficient training.**
5.  **Install Dependencies:** Make sure you have the necessary libraries installed in your environment:
    ```bash
    pip install stable-baselines3[extra] gym pygame pymunk numpy
    pip install 'shimmy>=2.0
    # Or if using gymnasium:
    # pip install stable-baselines3[extra] gymnasium pygame pymunk numpy
    ```
    *(You might already have these from setting up the environment)*
6.  **Run Training:** Open your terminal, navigate to the directory where you saved the script, and run:
    ```bash
    python train_drone_agent.py
    ```
7.  **Monitor with TensorBoard:** While training is running (or after it finishes), open *another* terminal, navigate to the same project directory (or one level above `LOG_DIR`), and run:
    ```bash
    tensorboard --logdir logs/drone_ppo/
    ```
    Then open the URL provided by TensorBoard (usually `http://localhost:6006/`) in your web browser to see graphs of the reward, loss functions, episode length, etc. This is essential for understanding if the agent is learning.
8.  **Tuning:** If the agent doesn't learn well, you may need to:
    *   Adjust the environment parameters (e.g., make the task easier initially by disabling wind or the moving platform).
    *   Tune the PPO hyperparameters within the `model = MODEL_ALGORITHM(...)` call (learning rate, rollout buffer size `n_steps`, etc.).
    *   Modify the reward function in `drone_2d_env.py` if the current shaping isn't effective.
    *   Train for longer (increase `TOTAL_TIMESTEPS`).

# Continuing/ resume training

## Configurations  - Continue Training 

In [9]:
# --- Training Configuration ---
# *** Paths ***
LOAD_MODEL_PATH = "models/ppo_drone_20_wind_3KG_180_T.zip" # <-- IMPORTANT: Path to the model saved previously
NEW_SAVE_PATH = "models/ppo_drone_20_wind_3KG_180_T+C" # <-- Path to save the model after *this* training session
LOG_DIR = "logs/drone_ppo/" # <-- Directory for TensorBoard logs (can be same or new)

# *** Training Parameters ***
ADDITIONAL_TIMESTEPS = 3_000_000 # <-- How many *more* steps to train
MODEL_ALGORITHM = PPO         # <-- Must match the algorithm of the loaded model
POLICY_TYPE = "MlpPolicy"     # <-- Must match the policy type of the loaded model
CHECKPOINT_FREQ = 50000       # Save a checkpoint every N steps (during this session)
N_ENVS = 10                    # Number of parallel environments (match previous setup if possible)

**Test Load**

In [10]:
from stable_baselines3 import PPO

# Attempt to load the saved model
model_loaded = PPO.load(LOAD_MODEL_PATH)
print("Model loaded successfully!")

Model loaded successfully!


## Continuing training ENV 1

**Key features:**

*   Specifies the path to the model you want to load (`LOAD_MODEL_PATH`).
*   Specifies a *new* path to save the model after continued training (`NEW_SAVE_PATH`).
*   Specifies the number of *additional* timesteps to train for (`ADDITIONAL_TIMESTEPS`).
*   Uses the same environment configuration (`ENV_CONFIG`) - **important for compatibility**.
*   Sets `reset_num_timesteps=False` in `model.learn()` to ensure the training step count and logs continue correctly.
*   Includes error handling if the specified model to load doesn't exist.

In [11]:


import os
import gym
from stable_baselines3 import PPO  # Or the algorithm you used (e.g., SAC, TD3)
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.callbacks import CheckpointCallback

# --- Environment Import ---

from drone_2d_custom_gym_env.drone_2d_env import Drone2dEnv



# --- Setup ---
os.makedirs(os.path.dirname(NEW_SAVE_PATH), exist_ok=True)
os.makedirs(LOG_DIR, exist_ok=True) # Log dir might already exist

print("--- Starting Drone Training Continuation ---")
print(f"Attempting to load model from: {LOAD_MODEL_PATH}")
print(f"Training for additional {ADDITIONAL_TIMESTEPS} timesteps.")
print(f"Environment Config: {ENV_CONFIG}")

# --- Check if Model Exists ---
if not os.path.exists(LOAD_MODEL_PATH):
    print(f"\n!!! Error: Model file not found at {LOAD_MODEL_PATH} !!!")
    print("Cannot continue training without a model to load.")
    exit()

# --- Create Vectorized Environment ---
# Needs to be created *before* loading the model
try:
    env = make_vec_env(
        Drone2dEnv,
        n_envs=N_ENVS,
        seed=0, # Seed can be different, but structure should be same
        vec_env_cls=SubprocVecEnv,
        env_kwargs=ENV_CONFIG
    )
except Exception as e:
    print(f"\n!!! Error creating environment: {e} !!!")
    exit()

# --- Load Model ---
try:
    print(f"\nLoading model...")
    # Pass the environment and tensorboard log directory to load
    # SB3 automatically detects the algorithm and policy type from the zip file
    model = MODEL_ALGORITHM.load(
        LOAD_MODEL_PATH,
        env=env, # Link the loaded model to the (new) environment instances
        tensorboard_log=LOG_DIR # Tell SB3 where to continue logging
    )
    print(f"Model loaded successfully. Current Timesteps: {model.num_timesteps}")
except Exception as e:
    print(f"\n!!! Error loading model from {LOAD_MODEL_PATH}: {e} !!!")
    print("Check if the file exists, is a valid SB3 model, and matches the environment structure.")
    env.close()
    exit()

# --- Setup Checkpoint Callback ---
checkpoint_callback = CheckpointCallback(
    save_freq=max(CHECKPOINT_FREQ // N_ENVS, 1),
    save_path=LOG_DIR, # Save checkpoints in the log directory
    name_prefix=f"{os.path.basename(NEW_SAVE_PATH)}_ckpt" # Prefix based on new save name
)

# --- Continue Training ---
print("\n--- Continuing Training ---")
start_timesteps = model.num_timesteps # Get current steps from loaded model
target_timesteps = start_timesteps + ADDITIONAL_TIMESTEPS
print(f"Training from {start_timesteps} to {target_timesteps} total steps.")

try:
    model.learn(
        total_timesteps=ADDITIONAL_TIMESTEPS, # Train for the *additional* steps
        log_interval=10,
        tb_log_name=f"{MODEL_ALGORITHM.__name__}_Drone", # Use same or new log name
        reset_num_timesteps=False, # <-- IMPORTANT: Do NOT reset timestep counter
        callback=checkpoint_callback
        )
except Exception as e:
    print(f"\n!!! Error during continued training: {e} !!!")
    print("Attempting to save model before exiting...")
    model.save(f"{NEW_SAVE_PATH}_error")
finally:
    # --- Save the Final Model ---
    print("\n--- Training Finished (or interrupted) ---")
    print(f"Saving final model to: {NEW_SAVE_PATH}.zip") # SB3 adds .zip automatically
    model.save(NEW_SAVE_PATH) # Use the NEW save path

    # --- Clean up ---
    env.close()
    print("Environment closed.")

print("\n--- Training Continuation Script Complete ---")
print(f"To monitor training, run: tensorboard --logdir {LOG_DIR}")




--- Starting Drone Training Continuation ---
Attempting to load model from: models/ppo_drone_20_wind_3KG_180_T.zip
Training for additional 3000000 timesteps.
Environment Config: {'render_sim': False, 'max_steps': 750, 'render_path': False, 'render_shade': False, 'shade_distance_m': 2.0, 'moving_platform': False, 'platform_speed': 2.0, 'initial_pos_random_range_m': 8.0, 'max_allowed_tilt_angle_rad': 3, 'enable_wind': True, 'wind_speed': 20, 'reward_landing': 500, 'reward_un_landing': 300, 'lander_mass': 3}
World Size: 50.0m x 50.0mWorld Size: 50.0m x 50.0mWorld Size: 50.0m x 50.0mWorld Size: 50.0m x 50.0m


Screen Size: 800.0px x 800.0pxScreen Size: 800.0px x 800.0pxScreen Size: 800.0px x 800.0px


Render Zoom Factor: 1.0Render Zoom Factor: 1.0
Render Zoom Factor: 1.0

Effective Pixels Per Meter: 16.00Effective Pixels Per Meter: 16.00Effective Pixels Per Meter: 16.00



Screen Size: 800.0px x 800.0px
Render Zoom Factor: 1.0
Effective Pixels Per Meter: 16.00
World Size: 50.0m x 50.0m
Scr



Step 219: OUT OF BOUNDS!
Step 240: OUT OF BOUNDS!
Step 242: OUT OF BOUNDS!
Step 294: OUT OF BOUNDS!
Step 301: OUT OF BOUNDS!
Step 300: OUT OF BOUNDS!
Step 257: OUT OF BOUNDS!
Step 182: OUT OF BOUNDS!
Step 573: OUT OF BOUNDS!
Step 301: OUT OF BOUNDS!
Step 623: OUT OF BOUNDS!
Step 238: OUT OF BOUNDS!
Step 257: OUT OF BOUNDS!
Step 202: OUT OF BOUNDS!
Step 750: MAX STEPS REACHED!
Step 206: OUT OF BOUNDS!
Step 232: OUT OF BOUNDS!
Step 369: OUT OF BOUNDS!
Step 259: OUT OF BOUNDS!
Step 204: OUT OF BOUNDS!
Step 171: OUT OF BOUNDS!
Step 178: OUT OF BOUNDS!
Step 242: OUT OF BOUNDS!
Step 237: OUT OF BOUNDS!
Step 257: OUT OF BOUNDS!
Step 222: OUT OF BOUNDS!
Step 254: OUT OF BOUNDS!
Step 253: OUT OF BOUNDS!
Step 209: OUT OF BOUNDS!
Step 368: OUT OF BOUNDS!
Step 162: OUT OF BOUNDS!
Step 265: OUT OF BOUNDS!
Step 285: OUT OF BOUNDS!
Step 191: OUT OF BOUNDS!
Step 257: OUT OF BOUNDS!
Step 195: OUT OF BOUNDS!
Step 267: OUT OF BOUNDS!
Step 337: OUT OF BOUNDS!
Step 297: OUT OF BOUNDS!
Step 201: OUT OF BOUN


**Explanation and How to Use:**

1.  **Save:** Save this code as `continue_training.py` (or similar) in your project.
2.  **Configure Paths:**
    *   Set `LOAD_MODEL_PATH` to the exact path of the `.zip` file generated by your previous training run (e.g., `models/ppo_drone_final.zip`).
    *   Set `NEW_SAVE_PATH` to where you want the model saved *after this continuation*. It's generally good practice to give it a distinct name initially (e.g., `models/ppo_drone_final_continued`).
    *   Set `LOG_DIR` (usually the same as before, so TensorBoard shows one continuous graph).
3.  **Configure Training:**
    *   Set `ADDITIONAL_TIMESTEPS` to the number of *extra* steps you want to train for.
    *   Ensure `MODEL_ALGORITHM` matches the algorithm used to create the loaded model (the script will load it automatically, but it's good practice).
4.  **Configure Environment (`ENV_CONFIG`):** Make sure this dictionary matches the settings used during the *original* training run, or at least is compatible (same observation/action space dimensions).
5.  **Run:** Execute the script from your terminal:
    ```bash
    python continue_training.py
    ```
6.  **Monitor:** Use TensorBoard as before, pointing it to the `LOG_DIR`. You should see the training metrics continue from where the previous run left off.

This script provides a robust way to resume training your drone landing agent.

# Evaluation Agent

## Evaluation Configuration

In [17]:
# --- Evaluation Configuration ---
# *** Paths ***
MODEL_PATH = "models/ppo_drone_20_wind_3KG_4_T.zip" # <-- IMPORTANT: Path to the trained model to evaluate
# MODEL_PATH = "logs/drone_ppo/drone_ppo_ckpt_100000_steps.zip" # Example for loading a checkpoint

# *** Evaluation Parameters ***
NUM_EVAL_EPISODES = 20        # How many episodes to run for evaluation
RENDER_SIM = True             # Set to True to watch the agent perform
RENDER_DELAY_S = 0.0         # Delay between frames when rendering (seconds) 0.00 - 0.03


# *** Environment Configuration for Evaluation ***
# Use settings representative of how you expect the agent to perform,
# or the specific conditions you want to test.
# Often similar or identical to training config, but render_sim=True here.
ENV_CONFIG_EVAL = {

    "render_sim": RENDER_SIM,             # IMPORTANT: Keep False for faster training
    "max_steps": 750,                # Max steps per episode during training
    "render_path": RENDER_SIM,            # Disable rendering options for speed
    "render_shade": RENDER_SIM,
    "shade_distance_m": 2.0,
    
             
    # No change if we are going resume training training
    
    "moving_platform": MOVING_PLATFORM,        #  Train with moving platform
    "platform_speed": PLATFORM_SPEED,           #  Platform speed if moving
    "initial_pos_random_range_m": INITIAL_POST_RAMDOM_RANGE_M,#  Random start range
    "max_allowed_tilt_angle_rad": MAX_ALLOWED_TILT_ANGLE_RAD, #  Allow slightly more tilt (approx 90 deg)
    "enable_wind": ENABLE_WIND,
    "wind_speed" : WIND_SPEED, #  Train with wind enabled
    'reward_landing': REWARD_LANDING,           #  reward for landing more high reward increase interest   
    'reward_un_landing': REWARD_UN_LANDING          #  reward for unstable landing more high reward increase interest   
   
}

## Evaluate Agent

In [19]:


import os
import gym
import numpy as np
import time
import pygame # Import pygame directly for flip
from stable_baselines3 import PPO # Or the algorithm you used (e.g., SAC, TD3)
# REMOVED: from stable_baselines3.common.vec_env import DummyVecEnv

# --- Environment Import ---
from drone_2d_custom_gym_env.drone_2d_env import Drone2dEnv




# --- Helper Function --- MODIFIED ---
def evaluate_agent(model, env, num_episodes=10, render=False, delay=0.0):
    """
    Evaluates a Stable Baselines3 agent on a single environment instance.

    :param model: The agent to evaluate.
    :param env: The single environment instance.
    :param num_episodes: Number of episodes to run.
    :param render: Whether to render the environment.
    :param delay: Delay between frames if rendering.
    :return: Dictionary containing evaluation statistics including outcome counts.
    """
    episode_rewards = []
    episode_lengths = []
    # --- Outcome Counters ---
    episode_outcomes = {
        "Landed Safely": 0,
        "Crashed": 0,
        "Lost Control": 0,
        "Out of Bounds": 0,
        "Battery Empty": 0,
        "Timeout": 0,
        "Error": 0, # For unexpected terminations
    }
    # --- End Counters ---

    # Get clock and fps if rendering and available
    the_clock = None; render_fps = 50
    if render and hasattr(env, 'clock') and env.clock: the_clock = env.clock
    if render and hasattr(env, 'metadata') and 'render_fps' in env.metadata: render_fps = env.metadata['render_fps']

    for episode in range(num_episodes):
        obs = env.reset()
        done = False
        current_episode_reward = 0
        current_episode_length = 0
        final_info_dict = {} # Store final info from step before termination

        print(f"  Starting Eval Episode {episode + 1}/{num_episodes}...")

        while not done:
            if render:
                try:
                    env.render(); pygame.display.flip()
                    if the_clock: the_clock.tick(render_fps)
                    if delay > 0: time.sleep(delay)
                except Exception as e: print(f"Error rendering/updating display: {e}"); render = False

            action, _states = model.predict(obs, deterministic=True)

            try:
                obs, reward, done, info = env.step(action)
                current_episode_reward += reward
                current_episode_length += 1
                final_info_dict = info # Store latest info

            except Exception as e:
                 print(f"Error during env.step(): {e}. Terminating episode.")
                 done = True
                 final_info_dict['error'] = f"Exception during step: {e}" # Mark error in dict

        # --- Episode Finished ---
        episode_rewards.append(current_episode_reward)
        episode_lengths.append(current_episode_length)

        # --- Increment Outcome Counter (Prioritized) ---
        outcome_recorded = False
        if final_info_dict.get('error'): # Check for explicit error first
            episode_outcomes["Error"] += 1
            print(f"  Episode {episode + 1}: FAILED (Error during step)")
            outcome_recorded = True
        elif final_info_dict.get('landed_safely', False):
            episode_outcomes["Landed Safely"] += 1
            print(f"  Episode {episode + 1}: SUCCESS (Landed Safely)")
            outcome_recorded = True
        elif final_info_dict.get('crashed', False):
            episode_outcomes["Crashed"] += 1
            print(f"  Episode {episode + 1}: FAILED (Crashed)")
            outcome_recorded = True
        elif final_info_dict.get('lost_control', False):
            episode_outcomes["Lost Control"] += 1
            print(f"  Episode {episode + 1}: FAILED (Lost Control)")
            outcome_recorded = True
        elif final_info_dict.get('out_of_bounds', False):
            episode_outcomes["Out of Bounds"] += 1
            print(f"  Episode {episode + 1}: FAILED (Out of Bounds)")
            outcome_recorded = True
        elif final_info_dict.get('Battery_empty', False):
            episode_outcomes["Battery Empty"] += 1
            print(f"  Episode {episode + 1}: FAILED (Battery Empty)")
        # Check timeout ONLY if no other condition was met
        elif current_episode_length >= env.max_steps:
            episode_outcomes["Timeout"] += 1
            print(f"  Episode {episode + 1}: FAILED (Timeout)")
            outcome_recorded = True

        # Fallback if done=True but no specific flag was set (should be rare)
        if not outcome_recorded and done:
             episode_outcomes["Error"] += 1 # Count as error
             print(f"  Episode {episode + 1}: FAILED (Unknown - 'done' but no specific flag)")
        # --- End Outcome Counter ---

        print(f"  Episode {episode + 1}: Length={current_episode_length}, Reward={current_episode_reward:.2f}")

    # --- Calculate Statistics ---
    mean_reward = np.mean(episode_rewards) if episode_rewards else 0; std_reward = np.std(episode_rewards) if episode_rewards else 0
    mean_length = np.mean(episode_lengths) if episode_lengths else 0
    # Calculate success rate based on counter
    success_rate = episode_outcomes["Landed Safely"] / num_episodes if num_episodes > 0 else 0

    stats = {
        "mean_reward": mean_reward, "std_reward": std_reward,
        "mean_length": mean_length, "success_rate": success_rate,
        "outcomes": episode_outcomes, # Include the outcome counts
        "total_episodes": num_episodes
    }
    return stats


if __name__ == "__main__":
    print("--- Starting Drone Agent Evaluation ---")
    print(f"Loading model from: {MODEL_PATH}")
    print(f"Evaluating for {NUM_EVAL_EPISODES} episodes.")
    print(f"Evaluation Environment Config: {ENV_CONFIG_EVAL}")

    if not os.path.exists(MODEL_PATH):
        print(f"\n!!! Error: Model file not found at {MODEL_PATH} !!!"); exit()

    # --- Create SINGLE Evaluation Environment ---
    try:
        eval_env = Drone2dEnv(**ENV_CONFIG_EVAL)
        print("Single evaluation environment created.")
    except Exception as e: print(f"\n!!! Error creating evaluation environment: {e} !!!"); exit()

    # --- Load Model ---
    try:
        loaded_model = PPO.load(MODEL_PATH, env=eval_env) # Pass single env
        print("\nModel loaded successfully.")
    except Exception as e: print(f"\n!!! Error loading model: {e} !!!"); eval_env.close(); exit()

    # --- Run Evaluation ---
    print("\n--- Running Evaluation Loop ---")
    eval_stats = evaluate_agent(
        model=loaded_model, env=eval_env, num_episodes=NUM_EVAL_EPISODES,
        render=RENDER_SIM, delay=RENDER_DELAY_S
    )

    # --- Print Summary --- MODIFIED ---
    print("\n\n--- Evaluation Summary ---")
    print(f"Evaluated for {eval_stats['total_episodes']} episodes.")
    print(f"Mean Reward: {eval_stats['mean_reward']:.2f} +/- {eval_stats['std_reward']:.2f}")
    print(f"Mean Episode Length: {eval_stats['mean_length']:.1f}")
    print(f"Success Rate (Landed Safely): {eval_stats['success_rate']:.2%}")
    print("\nEpisode Outcome Counts:")
    for outcome, count in eval_stats['outcomes'].items():
        # Print even if count is 0 for completeness
        print(f"  - {outcome}: {count}")
    print("--------------------------")
    # --- End Modification ---

    # --- Cleanup ---
    eval_env.close()
    print("Evaluation environment closed.")
    print("--- Evaluation Script Complete ---")



--- Starting Drone Agent Evaluation ---
Loading model from: models/ppo_drone_20_wind_3KG_4_T.zip
Evaluating for 20 episodes.
Evaluation Environment Config: {'render_sim': True, 'max_steps': 750, 'render_path': True, 'render_shade': True, 'shade_distance_m': 2.0, 'moving_platform': False, 'platform_speed': 2.0, 'initial_pos_random_range_m': 8.0, 'max_allowed_tilt_angle_rad': 4, 'enable_wind': True, 'wind_speed': 20, 'reward_landing': 500, 'reward_un_landing': 300}
World Size: 50.0m x 50.0m
Screen Size: 800.0px x 800.0px
Render Zoom Factor: 1.0
Effective Pixels Per Meter: 16.00

--- Resetting Episode 1 ---
Wind Enabled: True, Wind Dir: 133.2 deg. Moving Platform: False
Single evaluation environment created.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.

Model loaded successfully.

--- Running Evaluation Loop ---
  Starting Eval Episode 1/20...
Step 257: OUT OF BOUNDS!
  Episode 1: FAILED (Out of Bounds)
  Episode 1: Length=258, Reward=-32.31
  Starting Eval

SystemExit: 