In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
# from vizdoom import *
from doom_src import utilities

ImportError: dlopen(/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/vizdoom/vizdoom.so, 2): Symbol not found: __ZN5boost6system15system_categoryEv
  Referenced from: /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/vizdoom/vizdoom.so
  Expected in: /usr/local/opt/boost/lib/libboost_system-mt.dylib
 in /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/vizdoom/vizdoom.so

In [None]:
def eGreedyActionSelection(q_curr, eps):
    '''
    Preforms epsilon greedy action selectoin based on the Q-values.
    
    Args:
        q_curr: A numpy array that contains the Q-values for each action for a state.
        eps: The probability to select a random action. Float between 0 and 1.
        
    Returns:
        The selected action.
    '''
    r = np.random.random()
    if r < eps:
        #eps of the time return a random index of the vector q_curr
        return np.random.randint(0,len(q_curr))
    else:
        #1-eps of the time return an index of the max element of q (ties broken randomly)
        max_index = [0]
        for i in range(0,len(q_curr)):
            if q_curr[i] > q_curr[max_index[0]]:
                max_index = [i]
            elif q_curr[i] == q_curr[max_index[0]]:
                max_index.append(i)
        return random.choice(max_index)    
    

In [None]:
def sarsa(game, actions, num_episodes, gamma=1.0, alpha=0.1, 
                start_eps=0.2, final_eps=0.1, annealing_steps=1000,
                max_episode_steps=200):
    '''
    Sarsa algorithm.
    
    Args:
        - num_episodes: The number of episodes to train the agent for
        - gamma: The discount factor
        - alpha: The stepsize
        - start_eps: The initial epsilon value for e-greedy action selection
        - final_eps: The final epsilon value for the e-greedy action selection
        - annealing_steps: The number of steps to anneal epsilon over
        - max_episode_steps: The maximum number of steps an episode can take
        
    Returns: (Q_func, episode_rewards, episode_lengths)
        - Q: Dictonary mapping state -> action values
        - episode_rewards: Numpy array containing the reward of each episode during training
        - episode_lengths: Numpy array containing the length of each episode during training
    '''
    Q = defaultdict(lambda: np.zeros(len(actions)))
    episode_rewards = np.zeros(num_episodes)
    episode_lengths = np.zeros(num_episodes)

    exploration = utilities.LinearSchedule(annealing_steps, final_eps, start_eps)
    
    for i in range(num_episodes):
        frame = game.get_state().screen_buffer
        state, stacked_frames = stack_frames(
              stacked_frames, frame, True, config["stack_size"]
        )
        
        action = eGreedyActionSelection(Q[state], exploration.value(i))
        done = False
        for t in range(max_episode_steps):
            next_state, reward, done, _ = env.step(action)
#             next_action_p = pi(next_state, exploration.value(i))
#             next_action = np.random.choice(np.arange(len(next_action_p)), p=next_action_p)
            next_action = eGreedyActionSelection(Q[next_state],exploration.value(i) )
            episode_rewards[i] += reward
            episode_lengths[i] += 1
#             td = reward + gamma * Q[next_state][next_action] - Q[state][action]
#             Q[state][action] += alpha * td
            Q[state][action] += alpha*(reward + gamma * Q[next_state][next_action] - Q[state][action])
            if done:
                break
            state = next_state
            action = next_action 
    
    return Q, episode_rewards, episode_lengths

In [None]:
config = utilities.get_config("configs/config.json")
utilities.test_environment(config)

config[]

Q, episode_rewards, episode_lengths = sarsa(game, actions, num_episodes, gamma=1.0, alpha=0.1, 
                start_eps=0.2, final_eps=0.1, annealing_steps=1000,
                max_episode_steps=200):