In [1]:
#!pip install gymnasium
#!pip install ale-py


Collecting gymnasium
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading gymnasium-1.0.0-py3-none-any.whl (958 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/958.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m358.4/958.1 kB[0m [31m10.9 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m958.1/958.1 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium
Successfully installed farama-notifications-0.0.4 gymnasium-1.0.0
Collecting ale-py
  Downloading ale_py-0.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.6 kB)
Downloading ale_py-0.10.1-cp3

In [2]:
#import packages
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import ale_py
from PIL import Image
import os
import wandb

In [3]:
#DQN model (same as in training)
class DQN(nn.Module):
    def __init__(self, input_shape, n_actions):
        super(DQN, self).__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.ReLU(),
        )

        conv_out_size = self._get_conv_out(input_shape)

        self.fc = nn.Sequential(
            nn.Linear(conv_out_size, 512),
            nn.ReLU(),
            nn.Linear(512, n_actions)
        )

    def _get_conv_out(self, shape):
        o = self.conv(torch.zeros(1, *shape))
        return int(np.prod(o.size()))

    def forward(self, x):
        conv_out = self.conv(x).view(x.size()[0], -1)
        return self.fc(conv_out)

In [4]:
def preprocess_observation(observation):
    processed_obs = np.mean(observation, axis=2).astype(np.float32)
    processed_obs = torch.FloatTensor(processed_obs)
    return torch.nn.functional.interpolate(processed_obs.unsqueeze(0).unsqueeze(0),
                                         size=(84, 84),
                                         mode='bilinear',
                                         align_corners=False).squeeze(0)

In [5]:
def evaluate_model(model_path, num_episodes=100):
    #init env
    env = gym.make("ALE/Bowling-v5", render_mode="rgb_array")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    #load model
    checkpoint = torch.load(model_path, map_location=device)
    model = DQN((1, 84, 84), env.action_space.n).to(device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()

    #metrics storage
    episode_rewards = []
    best_reward = float('-inf')
    best_episode_frames = []

    print(f"Starting evaluation over {num_episodes} episodes...")

    for episode in range(num_episodes):
        state = env.reset()[0]
        state = preprocess_observation(state).unsqueeze(0).to(device)
        episode_reward = 0
        frames = []

        while True:
            #get action 
            with torch.no_grad():
                q_values = model(state)
                action = q_values.max(1)[1].view(1, 1)

            observation, reward, terminated, truncated, _ = env.step(action.item())
            episode_reward += reward

            #save frame
            frames.append(Image.fromarray(env.render()))

            if terminated or truncated:
                break

            #preprocess next state
            state = preprocess_observation(observation).unsqueeze(0).to(device)

        episode_rewards.append(episode_reward)

        #update best episode if current is better
        if episode_reward > best_reward:
            best_reward = episode_reward
            best_episode_frames = frames.copy()

        print(f"Episode {episode + 1}/{num_episodes}")
        print(f"Reward: {episode_reward}")
        print(f"Best Reward so far: {best_reward}")
        print("-" * 50)

    #save best episode as GIF
    if best_episode_frames:
        os.makedirs("evaluation_results", exist_ok=True)
        best_episode_path = "evaluation_results/best_episode.gif"
        best_episode_frames[0].save(
            best_episode_path,
            save_all=True,
            append_images=best_episode_frames[1:],
            duration=100,
            loop=0
        )
        print(f"Best episode saved to {best_episode_path}")

    #summary statistics
    print("\nEvaluation Summary:")
    print(f"Number of episodes: {num_episodes}")
    print(f"Mean reward: {np.mean(episode_rewards):.2f} ± {np.std(episode_rewards):.2f}")
    print(f"Min reward: {min(episode_rewards):.2f}")
    print(f"Max reward: {max(episode_rewards):.2f}")

    #log to wandb
    wandb.init(project="bowling-ale", name="model_evaluation")
    wandb.log({
        "mean_reward": np.mean(episode_rewards),
        "std_reward": np.std(episode_rewards),
        "min_reward": min(episode_rewards),
        "max_reward": max(episode_rewards),
        "best_episode": wandb.Video(best_episode_path, fps=30, format="gif")
    })
    wandb.finish()

In [7]:
gym.register_envs(ale_py)

model_path = "./Solving-ALE-environments/Part 1 - Bowling/DQN/models_dqn/best_model_dqn.pth"

if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model file not found: {model_path}")

evaluate_model(model_path, num_episodes=100)

  checkpoint = torch.load(model_path, map_location=device)


Starting evaluation over 100 episodes...
Episode 1/100
Reward: 14.0
Best Reward so far: 14.0
--------------------------------------------------
Episode 2/100
Reward: 14.0
Best Reward so far: 14.0
--------------------------------------------------
Episode 3/100
Reward: 14.0
Best Reward so far: 14.0
--------------------------------------------------
Episode 4/100
Reward: 14.0
Best Reward so far: 14.0
--------------------------------------------------
Episode 5/100
Reward: 14.0
Best Reward so far: 14.0
--------------------------------------------------
Episode 6/100
Reward: 14.0
Best Reward so far: 14.0
--------------------------------------------------
Episode 7/100
Reward: 14.0
Best Reward so far: 14.0
--------------------------------------------------
Episode 8/100
Reward: 14.0
Best Reward so far: 14.0
--------------------------------------------------
Episode 9/100
Reward: 14.0
Best Reward so far: 14.0
--------------------------------------------------
Episode 10/100
Reward: 14.0
Best

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc




0,1
max_reward,▁
mean_reward,▁
min_reward,▁
std_reward,▁

0,1
max_reward,24.0
mean_reward,14.34
min_reward,9.0
std_reward,2.24152
