In [1]:
import torch
import numpy as np
from models import DQN_square, DQN_dueling, NoisyNet_Dueling, NoisyNet, Categorical_DQN
import gymnasium as gym
import random
import matplotlib.pyplot as plt
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

## Compare Models

In [2]:
# DQN = DQN_square(3)
# DQN.load_state_dict(torch.load('data/DQN.pth'))
DQN_fixed = DQN_square(3)
DQN_fixed.load_state_dict(torch.load('data/DQN_paper_fixed_final.pth'))
DDQN = DQN_square(3)
DDQN.load_state_dict(torch.load('data/DDQN.pth'))
Dueling_DDQN = DQN_dueling(3)
Dueling_DDQN.load_state_dict(torch.load('data/Dueling_DDQN.pth'))
Noisy_Dueling_DDQN = NoisyNet_Dueling(3)
Noisy_Dueling_DDQN.load_state_dict(torch.load('data/Noisy_Dueling_DDQN.pth'))
Noisy_DDQN = NoisyNet(3)
Noisy_DDQN.load_state_dict(torch.load('data/Noisy_DDQN.pth'))

<All keys matched successfully>

In [3]:
# q_values_DQN = np.loadtxt('data/q_values_DQN.txt')
# steps_DQN = np.loadtxt('data/steps_DQN.txt')
q_values_DQN_fixed = np.loadtxt('data/q_values_fixed.txt')
steps_DQN_fixed = np.loadtxt('data/steps_fixed.txt')

q_values_DDQN = np.loadtxt('data/q_values_DDQN.txt')
steps_DDQN = np.loadtxt('data/steps_DDQN.txt')

q_values_Dueling_DDQN = np.loadtxt('data/q_values_Dueling_DDQN.txt')
steps_Dueling_DDQN = np.loadtxt('data/steps_Dueling_DDQN.txt')

q_values_Noisy_Dueling_DDQN = np.loadtxt('data/q_values_Dueling_DDQN.txt')
steps_Noisy_Dueling_DDQN = np.loadtxt('data/steps_Dueling_DDQN.txt')

q_values_Noisy_DDQN = np.loadtxt('data/q_values_Dueling_DDQN.txt')
steps_Noisy_DDQN = np.loadtxt('data/steps_Dueling_DDQN.txt')

q_values_Prioritized_DDQN = np.loadtxt('data/q_values_DDQN_Prioritized.txt')
steps_Prioritized_DDQN = np.loadtxt('data/steps_DDQN_Prioritized.txt')

q_values_Prioritized_Dueling_DDQN = np.loadtxt('data/q_values_Prioritized_Dueling_DDQN.txt')
steps_Prioritized_Dueling_DDQN = np.loadtxt('data/steps_Prioritized_Dueling_DDQN.txt')

q_values_Prioritized_Noisy_Dueling_DDQN = np.loadtxt('data/q_values_Prioritized_Noisy_Dueling_DDQN.txt')
steps_Prioritized_Noisy_Dueling_DDQN = np.loadtxt('data/steps_Prioritized_Noisy_Dueling_DDQN.txt')

In [3]:
def running_mean(x, N):
    cumsum = np.cumsum(np.insert(x, 0, 0)) 
    return (cumsum[N:] - cumsum[:-N]) / float(N)

In [7]:
# Plot steps per episode
# plt.plot(np.arange(len(steps_DQN)) + 1, steps_DQN)
N = 10
steps_DQN_fixed = running_mean(steps_DQN_fixed, N)
steps_DDQN = running_mean(steps_DDQN, N)
steps_Dueling_DDQN = running_mean(steps_Dueling_DDQN, N)
steps_NoisyDueling_DDQN = running_mean(steps_Noisy_Dueling_DDQN, N)
steps_Noisy_DDQN = running_mean(steps_Noisy_DDQN, N)
steps_Prioritized_DDQN = running_mean(steps_Prioritized_DDQN, N)
steps_Prioritized_Dueling_DDQN = running_mean(steps_Prioritized_Dueling_DDQN, N)
steps_Prioritized_Noisy_Dueling_DDQN = running_mean(steps_Prioritized_Noisy_Dueling_DDQN, N)

plt.plot(np.arange(len(steps_DQN_fixed)) + 1, steps_DQN_fixed, label='DQN fixed')
plt.plot(np.arange(len(steps_Dueling_DDQN)) + 1, steps_Dueling_DDQN, label='Dueling DDQN')
plt.plot(np.arange(len(steps_DDQN)) + 1, steps_DDQN, label='DDQN')
# plt.plot(np.arange(len(steps_Noisy_Dueling_DDQN)) + 1, steps_Noisy_Dueling_DDQN, label='Noisy_Dueling_DDQN')
plt.plot(np.arange(len(steps_Noisy_DDQN)) + 1, steps_Noisy_DDQN, label='Noisy_DDQN')
plt.plot(np.arange(len(steps_Prioritized_DDQN)) + 1, steps_Prioritized_DDQN, label='Prioritized_DDQN')
plt.plot(np.arange(len(steps_Prioritized_Dueling_DDQN)) + 1, steps_Prioritized_Dueling_DDQN, label='Prioritized_Dueling_DDQN')
plt.plot(np.arange(len(steps_Prioritized_Noisy_Dueling_DDQN)) + 1, steps_Prioritized_Noisy_Dueling_DDQN, label='Prioritized_Noisy_Dueling_DDQN')
plt.xlabel('Episode')
plt.ylabel('Steps')
plt.ylim(0,5000)
plt.legend()
plt.title('Steps per Episode for different Algorithms')
plt.savefig('plots/steps_DQN_compare.png')
plt.close()

# Plot q measures per episode
# plt.plot(np.arange(len(q_values_DQN)) + 1, q_values_DQN)
plt.plot(np.arange(len(q_values_DQN_fixed)) + 1, q_values_DQN_fixed, label='DQN fixed')
plt.plot(np.arange(len(q_values_DDQN)) + 1, q_values_DDQN, label='DDQN')
plt.plot(np.arange(len(q_values_Noisy_Dueling_DDQN)) + 1, q_values_Noisy_Dueling_DDQN, label='Noisy_Dueling_DDQN')
plt.plot(np.arange(len(q_values_Noisy_DDQN)) + 1, q_values_Noisy_DDQN, label='Noisy_DDQN')
# plt.plot(np.arange(len(q_values_Dueling_DDQN)) + 1, q_values_Dueling_DDQN, label='Dueling DDQN')
plt.plot(np.arange(len(q_values_Prioritized_Dueling_DDQN)) + 1, q_values_Prioritized_Dueling_DDQN, label='Prioritized_Dueling_DDQN')
plt.plot(np.arange(len(q_values_Prioritized_Noisy_Dueling_DDQN)) + 1, q_values_Prioritized_Noisy_Dueling_DDQN, label='Prioritized_Noisy_Dueling_DDQN')
plt.xlabel('Episode')
plt.ylabel('Average Q')
plt.ylim(-100, 0)
plt.legend()
plt.title('Average Q measure over sampled states')
plt.savefig('plots/q_measures_DQN_compare.png')
plt.close()


## Visualize missing steps and q values from data

### Prioritized DDQN

In [23]:
eval_prioritized = np.loadtxt("data/eval_Prioritized_DDQN.txt")
q_values = np.loadtxt("data/q_values_DDQN_prioritized.txt")
steps = np.loadtxt("data/steps_DDQN_prioritized.txt")

In [25]:
plt.scatter([50, 100, 150, 200, 250, 300, 350, 400, 450, 500], [-e for e in eval_prioritized], color='r', marker='x', zorder=1, label='evaluations')
m, b = np.polyfit([50, 100, 150, 200, 250, 300, 350, 400, 450, 500], [-e for e in eval_prioritized], 1)
x = np.arange(0, 500)
plt.plot(x, m*x+b, '--k', label='eval_regression')
plt.plot(np.arange(len(steps)) + 1, steps, zorder=-1, label='steps')
N = 10
steps_mean = running_mean(steps, N)
plt.plot(np.arange(len(steps_mean)) + 1, steps_mean, zorder=0, label='running average')
plt.legend()
plt.xlabel('Episode')
plt.ylabel('Steps')
plt.title('Steps per Episode - Prioritized_DDQN')
plt.savefig('plots/steps_Prioritized_DDQN.png')
plt.close()

plt.plot(np.arange(len(q_values)) + 1, q_values)
plt.xlabel('Episode')
plt.ylabel('Average Q')
plt.title('Average Q measure over sampled states')
plt.savefig('plots/q_measures_Prioritized_DDQN.png')
plt.close()

### Prioritized Noisy Dueling

In [4]:
eval_prioritized = np.loadtxt("data/eval_Prioritized_Noisy_Dueling_DDQN.txt")
q_values = np.loadtxt("data/q_values_Prioritized_Noisy_Dueling_DDQN.txt")
steps = np.loadtxt("data/steps_Prioritized_Noisy_Dueling_DDQN.txt")

plt.scatter([50, 100, 150, 200, 250, 300, 350, 400, 450, 500], [-e*4 for e in eval_prioritized], color='r', marker='x', zorder=1, label='evaluations')
m, b = np.polyfit([50, 100, 150, 200, 250, 300, 350, 400, 450, 500], [-e*4 for e in eval_prioritized], 1)
x = np.arange(0, 500)
plt.plot(x, m*x+b, '--k', label='eval_regression')
plt.plot(np.arange(len(steps)) + 1, steps, zorder=-1, label='steps')
N = 10
steps_mean = running_mean(steps, N)
plt.plot(np.arange(len(steps_mean)) + 1, steps_mean, zorder=0, label='running average')
plt.legend()
plt.xlabel('Episode')
plt.ylabel('Steps')
plt.title('Steps per Episode - Prioritized Noisy Dueling DDQN')
plt.savefig('plots/steps_Prioritized_Noisy_Dueling_DDQN.png')
plt.close()

plt.plot(np.arange(len(q_values)) + 1, q_values)
plt.xlabel('Episode')
plt.ylabel('Average Q')
plt.title('Average Q measure over sampled states')
plt.savefig('plots/q_measures_Prioritized_Noisy_Dueling_DDQN.png')
plt.close()

### Noisy DDQN

In [5]:
eval_prioritized = np.loadtxt("data/eval_Noisy_DDQN.txt")
q_values = np.loadtxt("data/q_values_Noisy_DDQN.txt")
steps = np.loadtxt("data/steps_Noisy_DDQN.txt")

plt.scatter([50, 100, 150, 200, 250, 300, 350, 400, 450, 500], [-e for e in eval_prioritized], color='r', marker='x', zorder=1, label='evaluations')
m, b = np.polyfit([50, 100, 150, 200, 250, 300, 350, 400, 450, 500], [-e for e in eval_prioritized], 1)
x = np.arange(0, 500)
plt.plot(x, m*x+b, '--k', label='eval_regression')
plt.plot(np.arange(len(steps)) + 1, steps, zorder=-1, label='steps')
N = 10
steps_mean = running_mean(steps, N)
plt.plot(np.arange(len(steps_mean)) + 1, steps_mean, zorder=0, label='running average')
plt.legend()
plt.xlabel('Episode')
plt.ylabel('Steps')
plt.title('Steps per Episode - Noisy DDQN')
plt.savefig('plots/steps_Noisy_DDQN.png')
plt.close()

plt.plot(np.arange(len(q_values)) + 1, q_values)
plt.xlabel('Episode')
plt.ylabel('Average Q')
plt.title('Average Q measure over sampled states')
plt.savefig('plots/q_measures_Noisy_DDQN.png')
plt.close()

### Prioritized Dueling DDQN

In [6]:
eval_prioritized = np.loadtxt("data/eval_Prioritized_Dueling_DDQN.txt")
q_values = np.loadtxt("data/q_values_Prioritized_Dueling_DDQN.txt")
steps = np.loadtxt("data/steps_Prioritized_Dueling_DDQN.txt")

plt.scatter([50, 100, 150, 200, 250, 300, 350, 400, 450, 500], [-e*4 for e in eval_prioritized], color='r', marker='x', zorder=1, label='evaluations')
m, b = np.polyfit([50, 100, 150, 200, 250, 300, 350, 400, 450, 500], [-e*4 for e in eval_prioritized], 1)
x = np.arange(0, 500)
plt.plot(x, m*x+b, '--k', label='eval_regression')
plt.plot(np.arange(len(steps)) + 1, steps, zorder=-1, label='steps')
N = 10
steps_mean = running_mean(steps, N)
plt.plot(np.arange(len(steps_mean)) + 1, steps_mean, zorder=0, label='running average')
plt.legend()
plt.xlabel('Episode')
plt.ylabel('Steps')
plt.title('Steps per Episode - Prioritized Dueling DDQN')
plt.savefig('plots/steps_Prioritized_Dueling_DDQN.png')
plt.close()

plt.plot(np.arange(len(q_values)) + 1, q_values)
plt.xlabel('Episode')
plt.ylabel('Average Q')
plt.title('Average Q measure over sampled states')
plt.savefig('plots/q_measures_Prioritized_Dueling_DDQN.png')
plt.close()

### DQN

In [6]:
eval_prioritized = np.loadtxt("data/eval_DQN.txt")
q_values = np.loadtxt("data/q_values_DQN.txt")
steps = np.loadtxt("data/steps_DQN.txt")

x = np.arange(50, 1001, 50)
plt.scatter(x, [-e for e in eval_prioritized], color='r', marker='x', zorder=1, label='evaluations')
m, b = np.polyfit(x, [-e for e in eval_prioritized], 1)
x = np.arange(0, 1000)
plt.plot(x, m*x+b, '--k', label='eval_regression')
plt.plot(np.arange(len(steps)) + 1, steps, zorder=-1, label='steps')
N = 10
steps_mean = running_mean(steps, N)
plt.plot(np.arange(len(steps_mean)) + 1, steps_mean, zorder=0, label='running average')
plt.legend()
plt.xlabel('Episode')
plt.ylabel('Steps')
plt.title('Steps per Episode - DQN')
plt.savefig('plots/steps_DQN.png')
plt.close()

plt.plot(np.arange(len(q_values)) + 1, q_values)
plt.xlabel('Episode')
plt.ylabel('Average Q')
plt.title('Average Q measure over sampled states')
plt.savefig('plots/q_measures_DQN.png')
plt.close()