In [6]:
import sys
import os
sys.path.append(os.path.abspath('../src'))
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
from lunar_lander_training import run_random_algorithm, run_PPO_algorithm
from utils import load_model, get_env
from stable_baselines3.common.vec_env import DummyVecEnv
import numpy as np
import gymnasium as gym

In [7]:
# Setting up variables
model_path = '../models/ppo_model.zip'
episodes = 10
total_timesteps = 10000
render_mode="human"
env_name = "LunarLander-v2"

In [8]:
#setting up our environment
env = get_env(env_name, render_mode)

In [9]:
# Loading the model
model = load_model(model_path)

In [10]:
 # Evaluteing our model
episode_rewards = []
episode_lengths = []
success_threshold = 200  # Define a success criterion, e.g., a score above 200
successes = 0
for episode in range(episodes):
        observation, info = env.reset()
        done = False
        score = 0
        length = 0
        while not done:
            env.render()
            action, _ = model.predict(observation)
            try:
                observation, reward, done, info = env.step(action)
            except ValueError:
                observation, reward, done, truncated, info = env.step(action)
                done = done or truncated
            score += reward
            length += 1
        
        episode_rewards.append(score)
        episode_lengths.append(length)
        if score >= success_threshold:
            successes += 1

        print(f"Episode {episode + 1} finished with score: {score}")

env.close()

# Calculate metrics
average_return = np.mean(episode_rewards)
std_return = np.std(episode_rewards)
success_rate = successes / episodes
average_length = np.mean(episode_lengths)

print("\nEvaluation Metrics:")
print(f"Average Return: {average_return}")
print(f"Standard Deviation of Return: {std_return}")
print(f"Success Rate: {success_rate * 100}%")
print(f"Average Episode Length: {average_length}")

Episode 1 finished with score: -42.33678123520906
Episode 2 finished with score: 76.1753790786468
Episode 3 finished with score: 46.31005081355374
Episode 4 finished with score: 93.2627206908075
Episode 5 finished with score: -126.43881060974235
Episode 6 finished with score: -44.931866194334766
Episode 7 finished with score: -29.420179224978696
Episode 8 finished with score: 11.453701094902074
Episode 9 finished with score: -47.234022585445196
Episode 10 finished with score: -95.43290021139731

Evaluation Metrics:
Average Return: -15.859270838319722
Standard Deviation of Return: 68.01716956752993
Success Rate: 0.0%
Average Episode Length: 419.9
