In [1]:
!pip3 install torch torchvision torchaudio



In [1]:
import numpy as np
import cv2
import gymnasium as gym
import torch
from collections import deque
import ale_py


# ===== CONFIGURATION =====
MODEL_PATH = "dqn_pong_best (2).pth copy 3"  # Path to your trained model
NUM_EPISODES = 1                  # Number of games to play
RENDER_DELAY = 5                 # ms between frames (0 for fastest)
# =========================

# Image preprocessing (must match training parameters)
def preprocess(obs,
               crop_top=34,
               crop_bottom=16,
               crop_left=0,
               crop_right=0,
               resize_width=84,
               resize_height=84):
    if len(obs.shape) == 3 and obs.shape[2] == 3:
        greyscaled = np.dot(obs[..., :3], [0.299, 0.587, 0.114])
    else:
        greyscaled = obs
    height, width = greyscaled.shape
    cropped = greyscaled[crop_top:height-crop_bottom, crop_left:width-crop_right]
    resized = cv2.resize(cropped, (resize_width, resize_height), interpolation=cv2.INTER_NEAREST)
    return resized

# Neural Network (must match training architecture)
class DQN(torch.nn.Module):
    def __init__(self, num_actions, in_channels=4):
        super(DQN, self).__init__()
        self.conv1 = torch.nn.Conv2d(in_channels, 32, kernel_size=8, stride=4)
        self.conv2 = torch.nn.Conv2d(32, 64, kernel_size=4, stride=2)
        self.conv3 = torch.nn.Conv2d(64, 64, kernel_size=3, stride=1)
        self.fc1 = torch.nn.Linear(64 * 7 * 7, 512)  # Adjusted for 84x84 input
        self.fc2 = torch.nn.Linear(512, num_actions)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv3(x))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        return self.fc2(x)

# Initialize environment
env = gym.make("ALE/Pong-v5", render_mode="human")

# Load trained model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DQN(env.action_space.n).to(device)
model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
model.eval()

print(f"Loaded model from {MODEL_PATH}")
print(f"Playing {NUM_EPISODES} games...")

for episode in range(NUM_EPISODES):
    obs, _ = env.reset()
    frame = preprocess(obs)
    frame_stack = deque([frame] * 4, maxlen=4)
    state = np.stack(frame_stack, axis=0)
    total_reward = 0
    done = False

    while not done:
        # Prepare state tensor
        state_tensor = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0) / 255.0
        
        # Get Q-values and select best action
        with torch.no_grad():
            q_values = model(state_tensor)
        action = q_values.argmax().item()
        
        # Take action
        next_obs, reward, terminated, truncated, _ = env.step(action)
        done = terminated or truncated
        total_reward += reward
        
        # Update frame stack
        next_frame = preprocess(next_obs)
        frame_stack.append(next_frame)
        next_state = np.stack(frame_stack, axis=0)
        state = next_state
        
        # Add small delay for visualization
        if RENDER_DELAY > 0:
            cv2.waitKey(RENDER_DELAY)
    
    print(f"Episode {episode+1}: Total Reward = {total_reward}")

env.close()
print("Visualization complete!")

A.L.E: Arcade Learning Environment (version 0.11.2+ecc1138)
[Powered by Stella]


Loaded model from dqn_pong_best (3).pth copy 2
Playing 1 games...
Episode 1: Total Reward = -21.0
Visualization complete!
