# Navigation

---

### 1. Start the Environment


The environment is already saved in the Workspace and can be accessed at the file path provided below.  Please run the next code cell without making any changes.

In [1]:
import gym
import numpy as np

# please do not modify the line below
env = gym.make('crane-v0')


### Create DQN Agent

In [2]:
import torch
import time
from collections import deque
from agent import Agent
import matplotlib.pyplot as plt
%matplotlib inline

model_weight_name = 'checkpoint_precise_2.pth'

state_size=4
action_size=3
seed=0

agent = Agent(state_size=4, action_size=3, seed=0)

### 3. DQN Agent Training

In [None]:
def dqn(n_episodes=10000, max_t=2000, eps_start=1.0, eps_end=0.01, eps_decay=0.997, target_scores=105000.0):
    """Deep Q-Learning.
    
    Params
    ======
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
        eps_start (float): starting value of epsilon, for epsilon-greedy action selection
        eps_end (float): minimum value of epsilon
        eps_decay (float): multiplicative factor (per episode) for decreasing epsilon
        target_scores (float): average scores aming to achieve, the agent will stop training once it reaches this scores
    """
    start = time.time()                # Start time
    scores = []                        # list containing scores from each episode
    scores_window = deque(maxlen=100)  # last 100 scores
    eps = eps_start                    # initialize epsilon
    
    for i_episode in range(1, n_episodes+1):
        # Reset env and score at the beginning of episode
        env_info = env.reset()                             # reset the environment
        state = env.state                                  # get the current state
        score = 0                                          # initialize the score
        
        for t in range(max_t):
            action = agent.act(state, eps)
            env_info = env.step(action)                    # send the action to the environment
            next_state = env_info[0]                       # get the next state
            reward = env_info[1]                           # get the reward
            done = env_info[2]                             # see if episode has finished
            
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                print("Episode finished after {} timesteps".format(t+1))
                print("final state is :", state)
                print("Reward is : ", score)
                break 

        scores_window.append(score)       # save most recent score
        scores.append(score)              # save most recent score
        eps = max(eps_end, eps_decay*eps) # decrease epsilon
        
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")
        
        if i_episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
        
        if np.mean(scores_window)>=target_scores:
            print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
            torch.save(agent.qnetwork_local.state_dict(), model_weight_name)
            break

    time_elapsed = time.time() - start
    print("Time Elapse: {:.2f}".format(time_elapsed))
    
    return scores

#scores = dqn(n_episodes=2000, target_scores=1000.0)
scores = dqn(n_episodes=10000, max_t=2000, eps_start=1.0, eps_end=0.01, eps_decay=0.997, target_scores=105000.0)


# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

### Watch the agent running ( Using saved weights)

In [7]:
# load the weights from file
#agent.qnetwork_local.state_dict()
agent.qnetwork_local.load_state_dict(torch.load('checkpoint_precise.pth'))
#agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth'))



env_info = env.reset()                             # reset the environment
state = env.state                                  # get the current state
score = 0                                          # initialize the score

t = 0
while True:
    env.render()
    time.sleep(0.008)
    action = agent.act(state)                      # select an action
    env_info = env.step(action)                    # send the action to the environment
    next_state = env_info[0]                       # get the next state
    reward = env_info[1]                           # get the reward
    done = env_info[2]                             # see if episode has finished
    score += reward                                # update the score
    state = next_state  # roll over the state to next time step
    t += 1
    if done:                                       # exit loop if episode finished
        print("final state is :", state)
        print("Total steps : ", t)
        break
    
print("Score: {}".format(score))
env.close()

final state is : [ 1.01166688 -0.01271586  3.16719652  0.02664652]
Total steps :  403
Score: 106467.5914105575


Calculate mean time  and percentage rate of model

In [10]:
#Choose model
agent.qnetwork_local.load_state_dict(torch.load('checkpoint_precise.pth'))

env_info = env.reset()                             # reset the environment
state = env.state                                  # get the current state
score = 0                                          # initialize the score

t = 0
while True:
    env.render()
    #time.sleep(0.008)
    action = agent.act(state)                      # select an action
    env_info = env.step(action)                    # send the action to the environment
    next_state = env_info[0]                       # get the next state
    reward = env_info[1]                           # get the reward
    done = env_info[2]                             # see if episode has finished
    score += reward                                # update the score
    state = next_state  # roll over the state to next time step
    t += 1
    if done:                                       # exit loop if episode finished
        print("final state is :", state)
        print("Total steps : ", t)
        print("Total time is : ", env.tau * t)
        break
    
print("Score: {}".format(score))
env.close()

final state is : [0.99486255 0.00434272 3.16497831 0.02340768]
Total steps :  398
Total time is :  7.96
Score: 105674.22841709381


In [None]:
torch.version.cuda

In [None]:
torch.cuda.is_available()