Basic Random Agent 

In [1]:
# Import Gym
import gymnasium as gym

# Create LunarLander Environment
env = gym.make("LunarLander-v3", render_mode='human')

# Reset the environment to start
observation, info = env.reset()

# Run for 1000 steps with random actions
for _ in range(1000):
    action = env.action_space.sample()  # Random action
    observation, reward, terminated, truncated, info = env.step(action)

    if terminated or truncated:  # If landed or crashed, reset
        observation, info = env.reset()

# Close the environment window
env.close()

  from pkg_resources import resource_stream, resource_exists


Precision Reward Wrapper

In [2]:
from gymnasium import Wrapper

# Custom wrapper to reward more precise landings
class PrecisionLandingWrapper(Wrapper):
    def __init__(self, env):
        super().__init__(env)

    def step(self, action):
        obs, reward, terminated, truncated, info = self.env.step(action)

        # If the lander has finished (landed or crashed)
        if terminated:
            x_pos = obs[0]  # Horizontal position from center

            # Reward adjustments based on how centered the landing is
            if abs(x_pos) < 0.05:
                reward += 100  # Perfect landing bonus
            elif abs(x_pos) < 0.1:
                reward += 50   # Good landing bonus
            elif abs(x_pos) < 0.2:
                reward += 5    # Okay landing bonus
            else:
                reward -= 50   # Penalty for poor landing

        return obs, reward, terminated, truncated, info


Training the Agent with PPO

In [None]:
from stable_baselines3 import PPO

# Create the training environment with the custom wrapper
train_env = PrecisionLandingWrapper(
    gym.make("LunarLander-v3", 
             continuous=False, 
             gravity=-10.0,
             enable_wind=False,
             wind_power=15.0, 
             turbulence_power=1.5)
)

# Initialize the PPO model with MLP (neural net) policy
model = PPO("MlpPolicy", train_env, verbose=1)

# Start training the agent
model.learn(total_timesteps=200000, log_interval=50)

# Save the trained model
model.save("ppo_lunar_lander")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


Testing the Trained Agent

In [None]:
# Load the saved PPO model
model = PPO.load("ppo_lunar_lander")

# Create a test environment with render enabled
test_env = gym.make("LunarLander-v3", 
                    continuous=False, 
                    gravity=-10.0,
                    enable_wind=False,
                    wind_power=15.0, 
                    turbulence_power=1.5,
                    render_mode='human')

# Reset environment for testing
obs, info = test_env.reset()

# Run testing loop
for _ in range(5000):
    action, _states = model.predict(obs, deterministic=True)  # Use trained model to select action
    obs, reward, terminated, truncated, info = test_env.step(action)

    if terminated or truncated:
        obs, info = test_env.reset()

# Close rendering window
test_env.close()