# Visualisation of Training and Testing data

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def plot_training_results(ax, rewards, accum_avg=False, mean=False):
    #plot rewards
    ax.plot(rewards, label='Rewards')

    #accumulated average
    if accum_avg:
        #accumulated = np.cumsum(rewards) / (np.arange(len(rewards)) + 1)
        accumulated = np.convolve(rewards, np.ones(100)/100, mode='valid')
        ax.plot(accumulated, 'r', label='Accumulated Reward')
    
    if mean:
        mean = np.mean(rewards)
        print(mean)
        ax.axhline(mean, color='g', linestyle='--', label='Mean Reward')

    #labels, ticks, legend and grid
    ax.set_xlabel('Episodes')
    ax.set_ylabel('Rewards')
    ax.legend()
    ax.grid(True)

# TRAINING PLOTS

In [None]:
BATCH_SIZE = [32, 64]
TARGET_UPDATE_FREQ = [1, 4, 8, 16, 32]
PREPATH = "lunarlander/models"

fig, axs = plt.subplots(len(TARGET_UPDATE_FREQ), len(BATCH_SIZE), figsize=(15, 20), sharex=True, sharey=True)

for i, target_update_freq in enumerate(TARGET_UPDATE_FREQ):
    for j, batch_size in enumerate(BATCH_SIZE):
        ax = axs[i, j]
        PATH = f"{PREPATH}/batch_size_{batch_size}/target_update_freq_{target_update_freq}"
        rewards = np.load(f"{PATH}/train_rewards.npy")
        plot_training_results(ax, rewards, accum_avg=True)
        ax.set_title(f'Batch Size: {batch_size}, Target Update Freq: {target_update_freq}')

plt.tight_layout()
plt.show()

# TESTING PLOTS

In [None]:
BATCH_SIZE = [32, 64]
TARGET_UPDATE_FREQ = [1, 4, 8, 16, 32]
PREPATH = "lunarlander/models"

fig, axs = plt.subplots(len(TARGET_UPDATE_FREQ), len(BATCH_SIZE), figsize=(15, 20), sharex=True, sharey=True)

for i, target_update_freq in enumerate(TARGET_UPDATE_FREQ):
    for j, batch_size in enumerate(BATCH_SIZE):
        ax = axs[i, j]
        PATH = f"{PREPATH}/batch_size_{batch_size}/target_update_freq_{target_update_freq}"
        rewards = np.load(f"{PATH}/test_rewards.npy")
        plot_training_results(ax, rewards, accum_avg=False, mean=True)
        ax.set_title(f'Batch Size: {batch_size}, Target Update Freq: {target_update_freq}')

plt.tight_layout()
plt.show()

# AVERAGE REWARD CARTPOLE
Average rewards for models with batch sizes [32, 64] and target update frequency [1, 2, 4, 6, 8] in training intervals of 100 episodes and testing of 100 episodes per model.

In [None]:
BATCH_SIZE = [32, 64]
TARGET_UPDATE_FREQ = [1, 2, 4, 6, 8]
PREPATH = "cartpole/models"

for batch_size in BATCH_SIZE:
    for target_update_freq in TARGET_UPDATE_FREQ:
        PATH = f"{PREPATH}/batch_size_{batch_size}/target_update_freq_{target_update_freq}"
        for episode in range(100, 500, 100):
            rewards = np.load(f"{PATH}/intermediate_results/test_rewards_episode_{episode}.npy")
            print(f"Batch Size: {batch_size} | Target Update Freq: {target_update_freq} | Episode: {episode} | Avg. Rewards {np.mean(rewards)}")
        
        final_rewards = np.load(f"{PATH}/test_rewards.npy")
        print(f"Batch Size: {batch_size} | Target Update Freq: {target_update_freq} | Final Avg. Rewards {np.mean(final_rewards)}")

# AVERAGE REWARD LUNARLANDER
Average rewards for models with batch sizes [32, 64] and target update frequency [1, 4, 8, 16, 32] in training intervals of 500 episodes and testing of 100 episodes per model.

In [None]:
BATCH_SIZE = [32, 64]
TARGET_UPDATE_FREQ = [1, 4, 8, 16, 32]
PREPATH = "lunarlander/models"

for batch_size in BATCH_SIZE:
    for target_update_freq in TARGET_UPDATE_FREQ:
        PATH = f"{PREPATH}/batch_size_{batch_size}/target_update_freq_{target_update_freq}"
        for episode in range(500, 5000, 500):
            rewards = np.load(f"{PATH}/intermediate_results/test_rewards_episode_{episode}.npy")
            print(f"Batch Size: {batch_size} | Target Update Freq: {target_update_freq} | Episode: {episode} | Avg. Rewards {np.mean(rewards)}")
        
        final_rewards = np.load(f"{PATH}/test_rewards.npy")
        print(f"Batch Size: {batch_size} | Target Update Freq: {target_update_freq} | Final Avg. Rewards {np.mean(final_rewards)}")