In [3]:
import random
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from standard.helper import initialize_grids, initialize_q_table, initialize_state_dict, get_closest_in_grid
import gymnasium as gym
import torch
from helper_DQN import scale_and_resize
import torchvision.transforms as transforms
from models import DQN_square, NoisyNet
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

In [4]:
# load model
model = NoisyNet(3)
model.load_state_dict(torch.load("data/Noisy_DDQN.pth"))
model.eval()

# initialize environment
env = gym.make("MountainCar-v0", render_mode='rgb_array')

state = env.reset()

lr = 0.01

transform = scale_and_resize()

action = env.action_space.sample()

# initialize q table
gamma = 0.99
state_space = 20 * 20
action_space = env.action_space.n
grid_x, grid_v = initialize_grids()
state_to_qtable = initialize_state_dict()
q = initialize_q_table(state_space, action_space)

In [13]:
for i in range(10):
    env.reset()
    noop = random.randint(0, 26)  # no-op starts
    for _ in range(noop):
        action = env.action_space.sample()
        env.step(action)
        
    stacked_images = []
    for _ in range(4):  # initialize starting state as frame stack
        state, reward, terminated, _, _ = env.step(action)
        img = env.render()
        img = transforms.ToTensor()(img)
        stacked_images.append(torch.squeeze(transform(img)))
    
    steps = 0
    X = torch.stack(stacked_images).unsqueeze(0)

    while True:

        if random.uniform(0,1) < 0.05:
            action = env.action_space.sample()
        else:
            action = model(X).max(1)[1].view(1, 1).item()
#         action = env.action_space.sample()

        # update image stack with new state
        stacked_images = []
        for _ in range(4):
            new_state, reward, terminated, _, _ = env.step(action)
            img = env.render()
            img = transforms.ToTensor()(img)
            stacked_images.append(torch.squeeze(transform(img)))
            
        X_new = torch.stack(stacked_images).unsqueeze(0)

        s = state_to_qtable[get_closest_in_grid(state, grid_x, grid_v)]
#         ns = state_to_qtable[get_closest_in_grid(new_state, grid_x, grid_v)]

        # Update Q table
#         q[s][action] = q[s][action] + lr * (reward + gamma * np.max(q[ns]) - q[s][action])
#         q[s][action] = model(X).max(1)[0].view(1, 1).item()
        q[s] = model(X).tolist()[0]
#         v = [0, 0, 0]
#         v[action] = 1
#         q[s] = model(X).gather(1, torch.tensor([v])).tolist()[0]#.view(1, 1).item()


        steps += 4
        
        state = new_state
        X = X_new

        if terminated or steps > 10000:  #truncated:
            if (i+1) % 1 == 0:
                print(f"episode: {i+1}\t mean q: {np.mean(q)}")
            break

episode: 1	 mean q: -7.017046146392822
episode: 2	 mean q: -8.14283927599589
episode: 3	 mean q: -8.470414555867514
episode: 4	 mean q: -8.470371357599895
episode: 5	 mean q: -8.47051984945933
episode: 6	 mean q: -8.470500648816426
episode: 7	 mean q: -8.470466963450114
episode: 8	 mean q: -8.47058537642161
episode: 9	 mean q: -8.470427401860555
episode: 10	 mean q: -8.470477973620097


In [14]:
np.savetxt("data/q_Noisy_DDQN.txt", q)

actions = np.max(q, axis=1)
actions = actions.reshape((20, 20))
plt.figure(figsize=(16, 12))
ax = plt.subplot(111)
ax = sns.heatmap(actions, annot=True)
plt.ylim(0, 20)
plt.xlabel("Position", fontsize=20)
plt.ylabel("Velocity", fontsize=20)
plt.title("Q values for optimal action - Noisy DDQN", fontdict={'fontsize': 25})
plt.savefig('plots/q_values_plot_Noisy_DDQN.png')
plt.close()