# Validation trained model: D3QN

In [1]:
import torch
import imageio
from dqn_agent import DQNAgent, device
from utils import np, pygame, stack_frames, PrioritizedReplayMemory, ReplayBuffer, create_directory

Using device: cuda
pygame 2.6.1 (SDL 2.28.4, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html


## Loading checkpoint.pth as project template (somehow not provides the best performance)

In [2]:
def validate_env(env, using_prioritized_replay, num_trials):
    if using_prioritized_replay:
        replay_buffer =  PrioritizedReplayMemory(capacity=50000)
        model_path = 'saved_models/d3qn_data_2/'  # Adjust if your checkpoint is elsewhere
    else:
        replay_buffer = ReplayBuffer(capacity=50000)
        model_path = 'saved_models/d3qn_normal_buffer_data_2/' 
        
    action_dim = len(env.action_space)
    state_shape = (4, 84, 84)

    # Initialize agent (make sure net_type and other args match your training)
    agent = DQNAgent(state_shape=state_shape, action_dim=action_dim, replay_buffer=replay_buffer, model_path=model_path, net_type='d3qn', use_prioritized_replay=True)

    # Load checkpoint
    checkpoint = torch.load(model_path + 'checkpoint.pth', map_location=device)
    agent.model.load_state_dict(checkpoint['model'])
    agent.target_model.load_state_dict(checkpoint['target_model'])
    agent.optimizer.load_state_dict(checkpoint['optimizer'])
    best_score = checkpoint['best_score']
    best_episode = checkpoint['episode']
    print(f">> Loaded model with best score: {best_score}, at episode: {best_episode}")

    # Set agent to evaluation mode
    agent.model.eval()
    results = []
    results = []
    best_trial_idx = -1
    best_trial_score = float('-inf')
    best_trial_frames = []

    for trial in range(num_trials):
        state = env.reset()
        state, stacked_frames = stack_frames(None, state, True)
        done = False
        total_reward = 0
        score = 0
        frames = []
        env.render()  # Ensure window is created

        while not done:
            action = agent.select_action(state, eval_mode=True)
            next_state, reward, done, _, score, health = env.step(action)
            next_state, stacked_frames = stack_frames(stacked_frames, next_state, False)
            state = next_state
            total_reward += reward
            env.render()  # Optional: comment out if running headless
            
            # Capture frame for video
            surface = pygame.display.get_surface()
            frame = pygame.surfarray.array3d(surface)
            frame = np.transpose(frame, (1, 0, 2))
            frames.append(frame)

        print(f"Trial {trial+1}: Total reward: {total_reward}, Score: {score}")
        results.append((total_reward, score))
        
        # Save best trial's frames
        if score > best_trial_score:
            best_trial_score = score
            best_trial_idx = trial
            best_trial_frames = frames

    env.close()

    # Optionally, print summary statistics
    avg_reward = sum(r for r, _ in results) / num_trials
    avg_score = sum(s for _, s in results) / num_trials
    print(f"\nAverage over {num_trials} trials: Reward = {avg_reward:.2f}, Score = {avg_score:.2f}")
    
    # Save best trial video
    if best_trial_frames:
        video_folder = 'saved_models/best_trial_videos/'
        create_directory(video_folder)
        video_path = f"best_trial_{best_trial_idx+1}_score_{best_trial_score}.mp4"
        imageio.mimsave(video_folder + video_path, best_trial_frames, fps=60, quality=9)
        print(f"Saved best trial video to: {video_path}")

In [None]:
# Set up environment and agent
from env_wrapper import SpaceShipEnv
env = SpaceShipEnv()

using_prioritized_replay=True   # Change to False if you want to test with normal replay buffer
num_trials = 1  # Number of trials to run for validation
validate_env(env, using_prioritized_replay, num_trials)

## Load my trained model for prediction network and target network (provide best performance in this ws)

In [None]:
def validate_env_specific_checkpoint(env, using_prioritized_replay, checkpoint=None):
    if using_prioritized_replay:
        replay_buffer =  PrioritizedReplayMemory(capacity=50000)
        model_path = 'saved_models/d3qn_data_2/d3qn_model_ep'  + checkpoint + '.pth'   #
        target_model_path = 'saved_models/d3qn_data_2/d3qn_target_model_ep' + checkpoint + '.pth'
    else:
        replay_buffer = ReplayBuffer(capacity=50000)
        model_path = 'saved_models/d3qn_normal_buffer_data_2/d3qn_model_ep'  + checkpoint +'.pth'  #
        target_model_path = 'saved_models/d3qn_normal_buffer_data_2/d3qn_target_model_ep' + checkpoint + '.pth'  #3050 > 3250
        
    action_dim = len(env.action_space)
    state_shape = (4, 84, 84)

    # Initialize agent (make sure net_type and other args match your training)
    agent = DQNAgent(state_shape=state_shape, action_dim=action_dim, replay_buffer=replay_buffer, model_path=model_path, net_type='d3qn', use_prioritized_replay=True)

    # Load checkpoint
    model_checkpoint = torch.load(model_path, map_location=device)
    target_model_checkpoint = torch.load(target_model_path, map_location=device)
    agent.model.load_state_dict(model_checkpoint)
    agent.target_model.load_state_dict(target_model_checkpoint)

    # Set agent to evaluation mode
    agent.model.eval()
    results = []
    results = []

    state = env.reset()
    state, stacked_frames = stack_frames(None, state, True)
    done = False
    total_reward = 0
    score = 0
    frames = []
    env.render()  # Ensure window is created

    while not done:
        action = agent.select_action(state, eval_mode=True)
        next_state, reward, done, _, score, health = env.step(action)
        next_state, stacked_frames = stack_frames(stacked_frames, next_state, False)
        state = next_state
        total_reward += reward
        env.render()  # Optional: comment out if running headless
        
        # Capture frame for video
        surface = pygame.display.get_surface()
        frame = pygame.surfarray.array3d(surface)
        frame = np.transpose(frame, (1, 0, 2))
        frames.append(frame)

    print(f"Total reward: {total_reward}, Score: {score}")
    results.append((total_reward, score))

    env.close()
    
    # Save best trial video
    return frames, score

In [None]:
# Set up environment and agent
from env_wrapper import SpaceShipEnv
env = SpaceShipEnv()

using_prioritized_replay=True   # Change to False if you want to test with normal replay buffer
checkpoint = '3450'  # Specify the checkpoint you want to validate, 3250 if using_prioritized_replay=False for using normal replay buffer
frame, score = validate_env_specific_checkpoint(env, using_prioritized_replay, checkpoint)

Using Dueling Double DQN architecture
Total reward: 1843.75, Score: 1925


In [None]:
video_folder = 'saved_models/best_trial_videos/'
create_directory(video_folder)
video_path = f"episode_{checkpoint}th_score_{score}.mp4"
imageio.mimsave(video_folder + video_path, frame, fps=60, quality=9)
print(f"Saved best trial video to: {video_path}")



Saved best trial video to: pisode_3450th_score_1925.mp4
