In [1]:
import gym
import random
import torch
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

from ddpg_agent import Agent

In [2]:
env = gym.make('LunarLanderContinuous-v2')
env.seed(10)
print('State shape: ', env.observation_space.shape)
print('Number of actions: ', env.action_space)

agent = Agent(state_size=env.observation_space.shape[0], action_size=env.action_space.shape[0] , random_seed=10)

State shape:  (8,)
Number of actions:  Box(-1.0, 1.0, (2,), float32)


In [None]:
def ddpg(n_episodes=4000, max_t=700):
    scores_deque = deque(maxlen=100)
    scores = []
    max_score = -np.Inf
    for i_episode in range(1, n_episodes+1):
        state = env.reset()
        agent.reset()
        score = 0
        
        for t in range(max_t):
            action = agent.act(state)
            #print (action)
            next_state, reward, done, _ = env.step(action)
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                break 
        scores_deque.append(score)
        scores.append(score)
        print('\r Episode {} \t Average Score: {:.2f} \t Score: {:.2f}'.format(i_episode, np.mean(scores_deque), score), end="")
        if i_episode % 100 == 0:
            torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
            torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))   
    return scores

scores = ddpg()

fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(1, len(scores)+1), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

Episode 100	Average Score: -609.85.85 	 Score: -465.91
Episode 200	Average Score: -321.70.70 	 Score: -406.40
 Episode 229 	 Average Score: -265.50 	 Score: -609.01