In [1]:
import numpy as np
import tensorflow as tf
import gym
import matplotlib.pyplot as plt

In [2]:
def play(env, policy):
    
    x = env.reset()
    terminal = False
    rewards = []

    while not terminal:
        
        u = policy.predict(x.reshape([1, -1]))
        u = np.argmax(u)
        x, r, terminal, _ = env.step(u)

        rewards.append(r)
    
    return np.sum(rewards)

In [3]:
env = gym.make('CartPole-v0')

## DQN

In [None]:
policy = tf.keras.models.load_model("../models/DQN_q_network.h5")
tf.keras.utils.plot_model(policy, to_file='q_network.png')
dqn = []

for i in range(1000):
    rewards = play(env, policy)
    dqn.append(rewards)



## Double DQN

In [None]:
policy = tf.keras.models.load_model("../models/DoubleDQN_q_network.h5")
doubledqn = []

for i in range(1000):
    rewards = play(env, policy)

    doubledqn.append(rewards)



## Prioritized Experience Replay

In [None]:
policy = tf.keras.models.load_model("../models/PrioritizedDQN_q_network.h5")
pridqn = []

for i in range(1000):
    rewards = play(env, policy)

    pridqn.append(rewards)

## Deuling DQN

In [None]:
policy = tf.keras.models.load_model("../models/DeulDQN_q_network.h5")
tf.keras.utils.plot_model(policy, to_file='deuling_q_network.png')
deuldqn = []

for i in range(1000):
    rewards = play(env, policy)

    deuldqn.append(rewards)

In [None]:
import seaborn as sns

plt.figure(figsize=(9, 6))
plt.bar(["DQN", "Double DQN", "Prioritized Experience Replay", "Deuling DQN"], 
        [np.mean(dqn), np.mean(doubledqn), np.mean(pridqn), np.mean(deuldqn)],
        yerr = [np.std(dqn), np.std(doubledqn), np.std(pridqn), np.std(deuldqn)],
       color=["tab:blue", "tab:orange", "tab:green", "tab:red"],
       capsize=20)
plt.ylabel("Average Rewards")

plt.tight_layout()
plt.savefig("comparision.png", dpi=300)

In [None]:
import pandas as pd

df = pd.DataFrame({"Model": ["DQN", "Double DQN", "Prioritized Experience Replay", "Deuling DQN"],
                                  "Mean": [np.mean(dqn), np.mean(doubledqn), np.mean(pridqn), np.mean(deuldqn)],
                                  "Std": [np.std(dqn), np.std(doubledqn), np.std(pridqn), np.std(deuldqn)]
                  })

In [None]:
print([np.std(dqn), np.std(doubledqn), np.std(pridqn), np.std(deuldqn)])