In [1]:
import gymnasium as gym
import my_package
import torch
import numpy as np
import matplotlib.pyplot as plt

from my_package import DQN, select_action

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
data = torch.load('model_with_data_ShipQuest-v5_1000_ep_v2.pth')
state_dim = data['state_dim']
action_dim = data['action_dim']
hidden_layer_dim = data['hidden_layer_dim']
training_ep = data['max_episodes']
env_name = data['env_name']
Options = data['Options']


""" Init neural networks """
policy_net = DQN(
    state_dim=state_dim,
    action_dim=action_dim,
    device=device,
    hidden_dim=hidden_layer_dim,
)

policy_net.load_state_dict(data['model_state_dict'])
print(f'Loaded training results with following options:')
Options

Loaded training results with following options:


  data = torch.load('model_with_data_ShipQuest-v5_1000_ep_v2.pth')


{'generate_random_ship': True,
 'workspace_safe_distance': 2,
 'n_actions': 7,
 'init_pose': None,
 'agent_radius': 0.1,
 'frontal_safe_distance': 0.25,
 'lidar_params': {'n_beams': 10, 'max_range': 1.0, 'FoV': 1.5707963267948966},
 'draw_lidar': False,
 'max_steps': 2000}

In [3]:
env = gym.make(env_name, Options=Options, render_mode='human')
for ep in range(5):
    state, info = env.reset()
    done = False
    total_reward = 0
    while not done:
        action = select_action(state, policy_net, 0, action_dim)
        next_state, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        state = next_state
        total_reward += reward
    print('total reward: ' + str(total_reward))
env.close()


total reward: 13.260000000000016
total reward: 17.44
total reward: -9.970000000000002
total reward: -28.440000000000175
total reward: 15.700000000000006


In [None]:
total_rewards = data['total_rewards']
coverage_per_ep = data['coverage_per_ep']
len_episodes = data['len_episodes']

""" Plot rewards """
window_size = 50
ma_reward = np.convolve(total_rewards, np.ones(window_size) / window_size, mode='valid')
plt.figure(figsize=(10, 6))
plt.scatter(np.arange(len(total_rewards)), total_rewards)
plt.plot(np.arange(window_size - 1, len(total_rewards)), ma_reward, color='red', label=f'Moving Average (Window={window_size})', linewidth=2)
plt.title(f'Total Reward and Moving Average Over {training_ep} Episodes')
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.legend()
plt.grid(True)
plt.show()

""" Plot coverage """
ma_coverage = np.convolve(coverage_per_ep, np.ones(window_size) / window_size, mode='valid')
plt.figure(figsize=(10, 6))
plt.scatter(np.arange(len(coverage_per_ep)), coverage_per_ep)
plt.plot(np.arange(window_size - 1, len(coverage_per_ep)), ma_coverage, color='red', label=f'Moving Average (Window={window_size})', linewidth=2)
plt.title(f'Coverage and Moving Average Over {training_ep} Episodes')
plt.xlabel('Episode')
plt.ylabel('Coverage %')
plt.legend()
plt.grid(True)
# plt.savefig('coverage.jpeg')

plt.show()
""" Plot episode duration """
ma_steps = np.convolve(len_episodes, np.ones(window_size) / window_size, mode='valid')
plt.figure(figsize=(10, 6))
plt.scatter(np.arange(len(len_episodes)), len_episodes)
plt.plot(np.arange(window_size - 1, len(len_episodes)), ma_steps, color='red', label=f'Moving Average (Window={window_size})', linewidth=2)
plt.title(f'Steps per Episode and Moving Average Over {training_ep} Episodes')
plt.xlabel('Episode')
plt.ylabel('Total steps')
plt.legend()
plt.grid(True)
plt.show()

""" Plot alpha decay """
if 'alpha_decay' in data.keys():
    lr_per_ep = data['lr_per_ep']
    plt.figure(figsize=(10, 6))
    plt.plot(np.arange(len(lr_per_ep)), lr_per_ep, label='LR Decay')
    plt.title('LR Decay per Episode')
    plt.xlabel('Episode')
    plt.ylabel('LR')
    plt.grid(True)
    plt.legend()
    plt.show()

In [None]:
# Options['ship_perimeter'] = 8
import imageio 

images = []
Options['draw_lidar'] = True

env = gym.make(env_name, Options=Options, render_mode='rgb_array')
state, info = env.reset()
img = env.render()
done = False
total_reward = 0
while not done:
    images.append(img)
    action = select_action(state, policy_net, 0, action_dim)
    next_state, reward, terminated, truncated, info = env.step(action)
    img = env.render()
    done = terminated or truncated
    state = next_state
    total_reward += reward
images.append(img)
print('total reward: ' + str(total_reward))
env.close()
# if total_reward > 8:
# imageio.mimsave("ShipQuest_failed.gif", [np.array(img) for i, img in enumerate(images) if i%2 == 0], fps=20)
# imageio.imsave("Agent.jpeg", img)

In [None]:
data['max_vel'] = 1.0
data['max_omega'] = 1.0 
data['delta_v'] = 0.5
data['delta_omega'] = 0.5

In [None]:
torch.save(data, 'model_with_data_ShipQuest-v5_1000_ep.pth')

In [None]:
data
