In [1]:
from vizdoom import *
import random
import time
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import cv2 as cv
from utility import *

from agent import DQN_agent
import torch
from torch.autograd import Variable
from tensorboardX import SummaryWriter

writer = SummaryWriter(log_dir = 'runs/defend_the_center')

game = DoomGame()
# change this for vizdoom defend_the_center path
game.load_config("./scenario/defend_the_center.cfg")
game.set_screen_format(ScreenFormat.GRAY8)
game.add_available_game_variable(GameVariable.KILLCOUNT)
game.init()

print(game.get_available_buttons())

agent = DQN_agent(len(actions), device, writer, is_dueling=True)


[Button.TURN_LEFT, Button.TURN_RIGHT, Button.ATTACK]
Creating dueling network


In [2]:
prepopulate_buffer(game, agent)

prepopulation start
0
prepopulation stop


In [3]:
from collections import deque

eps_start=1.0
eps_end=0.01
eps_decay=0.995

epsilon = eps_start

results = deque(maxlen=10)
cx = Variable(torch.zeros(64, 40, device=device)) # the cell states of the LSTM are reinitialized to zero
hx = Variable(torch.zeros(64, 40, device=device)) # the hidden states of the LSTM are reinitialized to 
episode = 0
max_reward =0
while True:
    episode+=1
    game.new_episode()
    last_total_health = 100
    last_ammo = game.get_game_variable(GameVariable.AMMO2)
    while True:
        next_state = state = game.get_state()
        starting_state = state.screen_buffer
        discounted_reward = 0
        for i in range(params.n_steps):
            state = next_state
            img = state.screen_buffer
            img = preprocess_frame(img)
            health_delta = game.get_game_variable(GameVariable.HEALTH) - last_total_health
            last_total_health = game.get_game_variable(GameVariable.HEALTH)
        
            ammo_delta = game.get_game_variable(GameVariable.AMMO2) - last_ammo
            last_ammo = game.get_game_variable(GameVariable.AMMO2)
                
            selected_action = agent.select_action(img, epsilon, (hx[0].view(1,-1), cx[0].view(1,-1)))
            if i == 0:
                first_action = selected_action
            action = actions[selected_action]
            print(type(action))
            print(action)
            reward = game.make_action(action)
            discounted_reward += (params.gamma**i)*(reward+healthReward(health_delta)+ammoReward(ammo_delta))
            done = game.is_episode_finished()
            if done:
                break
            next_state = game.get_state()
        if done:
            cx = Variable(torch.zeros(64, 40, device=device)) # the cell states of the LSTM are reinitialized to zero
            hx = Variable(torch.zeros(64, 40, device=device)) # the hidden states of the LSTM are reinitialized to zero
            next_img = np.zeros((84, 84), dtype='uint8')
            (hx, cx) = agent.step(episode, preprocess_frame(starting_state), first_action, discounted_reward , next_img, done, hx, cx)
            break
        else:
            cx = Variable(cx.data) # we keep the old cell states, making sure they are in a torch variable
            hx = Variable(hx.data) # we keep the old hidden states, making sure they are in a torch variable
        next_img = next_state.screen_buffer
        next_img = preprocess_frame(next_img)
        (hx, cx) = agent.step(episode, preprocess_frame(starting_state), first_action, discounted_reward , next_img, done, hx, cx)
        state = next_state
    # epsilon decay
    epsilon = epsilon*eps_decay
    if epsilon < eps_end:
        epsilon = eps_end
    # save each 500 episodes
    if episode%500==0:
        file_name = 'checkpoint_' + str(episode) + '.pth'
        torch.save(agent.local_network.state_dict(), file_name)
    if game.get_total_reward()> max_reward:
        max_reward = game.get_total_reward()
    results.append(game.get_total_reward())
    writer.add_scalar('Game variables/Kills', game.get_game_variable(GameVariable.KILLCOUNT), episode)
    if episode>=100:
        writer.add_scalar('Reward Loss/Reward', np.mean(results), episode)
    if episode>=10 and episode%100==0:
        print ("    ", episode, "mean result:", np.mean(results), " epsilon = ", epsilon, " reward: ", game.get_total_reward(), " max reward = " , max_reward)
    if np.mean(results)>= 20:
        print("Congratulations, your AI wins in episode ", episode)
        break

<class 'list'>
[0, 1, 0, 0, 0, 0, 0]
<class 'list'>
[0, 1, 0, 0, 0, 0, 0]
<class 'list'>
[0, 0, 1, 0, 0, 0, 0]
<class 'list'>
[1, 0, 0, 0, 0, 0, 0]
<class 'list'>
[0, 1, 0, 0, 0, 0, 0]
<class 'list'>
[1, 0, 0, 0, 0, 0, 0]
<class 'list'>
[0, 0, 1, 0, 0, 0, 0]
<class 'list'>
[1, 0, 0, 0, 0, 0, 0]
<class 'list'>
[1, 0, 0, 0, 0, 0, 0]
<class 'list'>
[0, 1, 0, 0, 0, 0, 0]
<class 'list'>
[0, 1, 0, 0, 0, 0, 0]
<class 'list'>
[1, 0, 0, 0, 0, 0, 0]
<class 'list'>
[0, 1, 0, 0, 0, 0, 0]
<class 'list'>
[0, 0, 1, 0, 0, 0, 0]
<class 'list'>
[0, 0, 1, 0, 0, 0, 0]




<class 'list'>
[0, 0, 1, 0, 0, 0, 0]
<class 'list'>
[1, 0, 0, 0, 0, 0, 0]
<class 'list'>
[1, 0, 0, 0, 0, 0, 0]
<class 'list'>
[0, 1, 0, 0, 0, 0, 0]
<class 'list'>
[0, 0, 1, 0, 0, 0, 0]
<class 'list'>
[0, 0, 1, 0, 0, 0, 0]
<class 'list'>
[0, 0, 1, 0, 0, 0, 0]
<class 'list'>
[0, 1, 0, 0, 0, 0, 0]
<class 'list'>
[1, 0, 0, 0, 0, 0, 0]
<class 'list'>
[0, 1, 0, 0, 0, 0, 0]
<class 'list'>
[0, 0, 1, 0, 0, 0, 0]
<class 'list'>
[0, 0, 1, 0, 0, 0, 0]
<class 'list'>
[0, 0, 1, 0, 0, 0, 0]
<class 'list'>
[1, 0, 0, 0, 0, 0, 0]
<class 'list'>
[1, 0, 0, 0, 0, 0, 0]


KeyboardInterrupt: 

In [None]:
file_name = 'checkpoint_final.pth'
torch.save(agent.local_network.state_dict(), file_name)

fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(results)), results)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()
fig.set_size_inches(18.5, 10.5)
fig.savefig('result.png', dpi=100)

In [None]:
agent = DQN_agent(len(actions), device, writer, is_dueling=True)
agent.predict_network.load_state_dict(torch.load('C:/Users/Marcin i Ewa/doom_DQL/Doom/DQN + priorityReplay/checkpoint_final.pth'))

cx = Variable(torch.zeros(1, 40, device=device)) # the cell states of the LSTM are reinitialized to zero
hx = Variable(torch.zeros(1, 40, device=device)) # the hidden states of the LSTM are reinitialized to 

for episode in range(10):
    game.new_episode()
    images = []
    while True:
        state = game.get_state()
        img = state.screen_buffer
        img = preprocess_frame(img)
        misc = state.game_variables
        selected_action = agent.select_action(img, 0., (hx, cx))
        action = actions[selected_action]
        reward = game.make_action(action)
        done = game.is_episode_finished()
        if done:
            break
        state = game.get_state()
        time.sleep(0.01)
    print('Episode: \t{} \tScore: \t{:.2f}'.format(episode, game.get_total_reward()))