In [1]:
from vizdoom import *
import random
import time
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import cv2 as cv
from skimage import transform

from agent import DQN_agent, params
import torch
from torch.autograd import Variable

def preprocess_frame(frame):
    # Greyscale frame already done in our vizdoom config
    # x = np.mean(frame,-1)
    
    # Crop the screen (remove the roof because it contains no information)
    cropped_frame = frame[30:-10,30:-30]
    
    # Normalize Pixel Values
    normalized_frame = cropped_frame/255.0
    
    # Resize
    preprocessed_frame = transform.resize(normalized_frame, [84,84])
    return preprocessed_frame



device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

game = DoomGame()
game.load_config("../Udacity/doom/scenarios/basic.cfg")
game.set_screen_format(ScreenFormat.GRAY8)
game.init()

shoot = [0, 0, 1]
left = [1, 0, 0]
right = [0, 1, 0]
actions = [shoot, left, right]

agent = DQN_agent(3, device)

cx = Variable(torch.zeros(64, 256, device=device)) # the cell states of the LSTM are reinitialized to zero
hx = Variable(torch.zeros(64, 256, device=device)) # the hidden states of the LSTM are reinitialized to 
def prepopulate_buffer():
    print("prepopulation start")
    samples_amount=0
    while True:
        game.new_episode()
        while True:
            if samples_amount%1000 ==0:
                print(samples_amount)
            if samples_amount==10000:
                break
            state = game.get_state()
            img = state.screen_buffer
            img = preprocess_frame(img)
            selected_action = agent.select_action(img, 0.95, (hx[0].view(1,-1), cx[0].view(1,-1)))
            action = actions[selected_action]
            reward = game.make_action(action)
            done = game.is_episode_finished()
            if done:
                break
            next_state = game.get_state()
            next_img = next_state.screen_buffer
            next_img = preprocess_frame(next_img)
            agent.memoryBuffer.add(img, selected_action, reward, next_img, done)
            agent.memoryBuffer.add(img, selected_action, reward, next_img, done)
            samples_amount+=2
            state = next_state
        if len(agent.memoryBuffer)%100 ==0:
            print("len = ", len(agent.memoryBuffer))
        if samples_amount==10000:
            break
    print("prepopulation stop")
prepopulate_buffer()

prepopulation start
0


  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


KeyboardInterrupt: 

In [None]:
from collections import deque

episodes = 5000
eps_start=1.0
eps_end=0.01
eps_decay=0.9995
max_steps = 500

epsilon = eps_start

results = deque(maxlen=100)
cx = Variable(torch.zeros(64, 256, device=device)) # the cell states of the LSTM are reinitialized to zero
hx = Variable(torch.zeros(64, 256, device=device)) # the hidden states of the LSTM are reinitialized to 

for i in range(episodes):
    game.new_episode()
    while True:
        state = game.get_state()
        img = state.screen_buffer
        img = preprocess_frame(img)
        misc = state.game_variables
        selected_action = agent.select_action(img, epsilon, (hx[0].view(1,-1), cx[0].view(1,-1)))
        action = actions[selected_action]
        reward = game.make_action(action)
        done = game.is_episode_finished()
        if done:
            cx = Variable(torch.zeros(64, 256, device=device)) # the cell states of the LSTM are reinitialized to zero
            hx = Variable(torch.zeros(64, 256, device=device)) # the hidden states of the LSTM are reinitialized to zero
            break
        else:
            cx = Variable(cx.data) # we keep the old cell states, making sure they are in a torch variable
            hx = Variable(hx.data) # we keep the old hidden states, making sure they are in a torch variable
        next_state = game.get_state()
        next_img = next_state.screen_buffer
        next_img = preprocess_frame(next_img)
        (hx, cx) = agent.step(img, selected_action, reward, next_img, done, hx, cx)
        state = next_state
        #time.sleep(0.02)
    epsilon = epsilon*eps_decay
    if epsilon < eps_end:
        epsilon = eps_end    
    if i%250==0:
        file_name = 'checkpoint_' + str(i) + '.pth'
        torch.save(agent.local_network.state_dict(), file_name)
    results.append(game.get_total_reward())
    if i>=100 and i%100==0:
        print ("    ", i, "mean result:", np.mean(results))

In [None]:
file_name = 'checkpoint_' + str(i) + '.pth'
torch.save(agent.local_network.state_dict(), file_name)

fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(results)), results)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

In [7]:
agent = DQN_agent(3, device)
agent.predict_network.load_state_dict(torch.load('C:/Users/Marcin i Ewa/doom_DQL/checkpoint_500.pth'))

cx = Variable(torch.zeros(1, 256, device=device)) # the cell states of the LSTM are reinitialized to zero
hx = Variable(torch.zeros(1, 256, device=device)) # the hidden states of the LSTM are reinitialized to 

for episode in range(10):
    game.new_episode()             
    while True:
        state = game.get_state()
        img = state.screen_buffer
        img = preprocess_frame(img)
        misc = state.game_variables
        selected_action = agent.select_action(img, 0., (hx, cx))
        action = actions[selected_action]
        reward = game.make_action(action)
        done = game.is_episode_finished()
        if done:
            break
        state = game.get_state()
        time.sleep(0.05)
    print('Episode: \t{} \tScore: \t{:.2f}'.format(episode, game.get_total_reward()))


Episode: 	0 	Score: 	58.00
Episode: 	1 	Score: 	58.00
Episode: 	2 	Score: 	92.00
Episode: 	3 	Score: 	85.00
Episode: 	4 	Score: 	62.00
Episode: 	5 	Score: 	58.00
Episode: 	6 	Score: 	58.00
Episode: 	7 	Score: 	74.00
Episode: 	8 	Score: 	58.00
Episode: 	9 	Score: 	58.00
