In [1]:
import numpy as np
import gym
import random
import copy
from collections import deque

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from matplotlib import pyplot as plt

In [2]:
class Actor(nn.Module):
    def __init__(self, scaling, state_size, hidden_size, action_size):
        super(Actor, self).__init__()
        self.fc1 = nn.Linear(state_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, action_size)
        
    def forward(self, state):
        x = F.relu(self.fc1(state))
        x = F.relu(self.fc2(x))
        return torch.tanh(self.fc3(x)) * scaling

In [3]:
class Critic(nn.Module):
    def __init__(self, state_size, hidden_size, action_size):
        super(Critic, self).__init__()
        self.fc1 = nn.Linear(state_size + action_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, 1)
    
    def forward(self, state, action):
        x = torch.cat([state, action], 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

In [4]:
class DDPG_Agent():
    def __init__(self, scaling, render, buffer_size, batch_size, gamma, tau, noise,
                state_size, hidden_size, action_size, actor_lr, critic_lr):
        
        self.scaling = scaling
        self.render = render
        self.buffer_size = buffer_size
        self.batch_size = batch_size
        self.gamma = gamma
        self.tau = tau
        self.noise = noise
        
        self.state_size = state_size
        self.hidden_size = hidden_size
        self.action_size = action_size
        self.actor_lr = actor_lr
        self.critic_lr = critic_lr
        
        self.actor = Actor(scaling, state_size, hidden_size, action_size)
        self.critic = Critic(state_size, hidden_size, action_size)
        self.target_actor = copy.deepcopy(self.actor)
        self.target_critic = copy.deepcopy(self.critic)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr = actor_lr)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), lr = critic_lr)
        
        self.state_memory = []
        self.next_state_memory = []
        self.action_memory = []
        self.reward_memory = []
        self.terminal_memory = []
        self.memory_counter = 0
        
    def train(self):
        state = env.reset()
        ep_reward = 0
        num_step = 0
        terminal = False
        
        while not terminal:
            
            if self.render:
                env.render()
                
            tensor_state = torch.from_numpy(state).float()            
            action = self.actor(tensor_state)
            action = torch.add(action, torch.from_numpy(np.random.normal(0, self.noise, self.action_size)))
            action = action.detach().numpy()
            next_state, reward, terminal, _ = env.step(action)
            ep_reward += reward

            if self.memory_counter < self.buffer_size: 
                self.state_memory.append(state)
                self.next_state_memory.append(next_state)
                self.action_memory.append(action)
                self.reward_memory.append(reward)
                self.terminal_memory.append(1 - terminal) # terminal = 1 if true so 0 if terminal            
            else:     
                index = self.memory_counter % self.buffer_size
                self.state_memory[index] = state
                self.next_state_memory[index] = next_state
                self.action_memory[index] = action
                self.reward_memory[index] = reward
                self.terminal_memory[index] = 1 - terminal
            self.memory_counter += 1 
                
            # If more items in memory list than batch size, sample items from each memory list
            if self.memory_counter >= self.batch_size: 
                batch = random.sample(range(len(self.state_memory)), self.batch_size) 
                states = []
                next_states = []
                actions = []
                rewards = []
                terminals = []
                
                for item in batch:
                    states.append(self.state_memory[item])
                    next_states.append(self.next_state_memory[item])
                    actions.append(self.action_memory[item])
                    rewards.append(self.reward_memory[item])
                    terminals.append(self.terminal_memory[item])
                
                # Reformat sample lists 
                states = np.array(states)
                next_states = np.array(next_states)
                actions = np.array(actions)
                states = torch.tensor(states)
                next_states = torch.tensor(next_states)
                actions = torch.tensor(actions)
                states = states.to(torch.float32) 
                actions = actions.to(torch.float32) 
                next_states = next_states.to(torch.float32) 
                
                # Gradient descent on critic prep
                target_actions = self.target_actor(next_states)
                next_Qtargets = self.target_critic(next_states, target_actions)
                Qtargets = []
                for i in range(self.batch_size):
                    Qtargets.append(rewards[i] + self.gamma * next_Qtargets[i] * terminals[i]) # If next state terminal, will be 0
                Qtargets = torch.tensor(Qtargets)
                Qtargets = Qtargets.view(self.batch_size, 1)
                
                # Gradient descent on critic
                Qexpected = self.critic(states, actions)
                critic_loss = nn.MSELoss()(Qexpected, Qtargets)
                self.critic_optimizer.zero_grad()
                critic_loss.backward()
                self.critic_optimizer.step()

                # Gradient ascent on actor
                actions_predicted = self.actor(states)
                actor_loss = -self.critic(states, actions_predicted)
                actor_loss = torch.mean(actor_loss)
                self.actor_optimizer.zero_grad()
                actor_loss.backward()
                self.actor_optimizer.step()

                # Update target networks
                with torch.no_grad():
                    for target_parameter, parameter in zip(self.target_critic.parameters(), self.critic.parameters()):
                        target_parameter.copy_((target_parameter * (1 - self.tau)) + (parameter * self.tau))                    

                with torch.no_grad():
                    for target_parameter, parameter in zip(self.target_actor.parameters(), self.actor.parameters()):
                        target_parameter.copy_((target_parameter * (1 - self.tau)) + (parameter * self.tau))   
                
            state = next_state
            num_step += 1
            
        if self.render:
            env.close()
            
        return ep_reward
     
    def test(self):
        
        scores = []
        
        for i in range(100):
            state = env.reset()
            terminal = False
            ep_reward = 0

            while not terminal:
                tensor_state = torch.from_numpy(state).float()
                action = self.actor(tensor_state)
                action = torch.add(action, torch.from_numpy(np.random.normal(0, self.noise, self.action_size)))
                action = action.detach().numpy()
                next_state, reward, terminal, _ = env.step(action)
                ep_reward += reward 
                state = next_state
        
            scores.append(ep_reward)
        
        print('Scores during testing')
        print('Mean score:', np.mean(scores))
        x = [i for i in range(1, len(scores)+1)]
        plt.plot(x, scores)
        plt.ylabel('Score')
        plt.xlabel('Episodes')
        plt.show()
  
    def visualise(self):
        for i in range(5):
            state = env.reset()
            terminal = False
            ep_reward = 0

            while not terminal:
                env.render()
                tensor_state = torch.from_numpy(state).float()
                action = self.actor(tensor_state)
                action = torch.add(action, torch.from_numpy(np.random.normal(0, self.noise, self.action_size)))
                action = action.detach().numpy()
                next_state, reward, terminal, _ = env.step(action)
                ep_reward += reward 
                state = next_state

            print('Episode reward:', ep_reward)
            env.close()

In [5]:
environment = 'BipedalWalker-v3' # Change environment name here
env = gym.make(environment)

if environment == 'Pendulum-v1':
    scaling = 2
elif environment == 'LunarLanderContinuous-v2' or 'BipedalWalker-v3' or 'BipedalWalkerHardcore-v3':
    scaling = 1

agents = 2
episodes = 3000
all_scores = []
all_moving_means = [] # Mean score of the last X episodes
previous = 100 # Set value of X 

for i in range(1, agents + 1):
    
    agent = DDPG_Agent(scaling = scaling, render = False, buffer_size = 1000000, batch_size = 100, gamma = 0.99, tau = 0.001, noise = 0.05,
                state_size = env.observation_space.shape[0], hidden_size = 300, action_size = env.action_space.shape[0], actor_lr = 0.0001, critic_lr = 0.001)
    agent_scores = []
    agent_moving_means = []
    
    for j in range(1, episodes + 1):
        
        ep_reward = agent.train()
        agent_scores.append(ep_reward)
        agent_moving_means.append(np.mean(agent_scores[-previous:])) 
        
        if j % 1 == 0: # How often to print
            print('Episode: {}/{} Score: {:.3f} \t Mean score over last {} episodes: {:.3f}'.format(j, episodes, ep_reward, previous, agent_moving_means[j-1])) 
            
    print('*************************************************************')
    print('Agent {} complete \t Mean score of agent: {:.2f}'.format(i, np.mean(agent_scores))) 
    print('*************************************************************')
    all_scores.append(agent_scores)
    all_moving_means.append(agent_moving_means)

all_scores = np.array(all_scores)
average_score = all_scores.mean(0) # Average the scores across agents
x = [i for i in range(1, len(average_score) + 1)]
print('Scores during training')
plt.plot(x, average_score)
plt.ylabel('Score averaged across agents')
plt.xlabel('Episodes')              
plt.show()

all_moving_means = np.array(all_moving_means)
average_moving_mean = all_moving_means.mean(0) # Average the means across agents
x = [i for i in range(1, len(average_moving_mean) + 1)]
print('Mean score of last {} episodes during training'.format(previous))
plt.plot(x, average_moving_mean)
plt.ylabel('Mean score of last {} episodes averaged across agents'.format(previous))       
plt.xlabel('Episodes')
plt.show()

Episode: 1/3000 Score: -110.655 	 Mean score over last 100 episodes: -110.655
Episode: 2/3000 Score: -110.150 	 Mean score over last 100 episodes: -110.403
Episode: 3/3000 Score: -145.471 	 Mean score over last 100 episodes: -122.092
Episode: 4/3000 Score: -104.811 	 Mean score over last 100 episodes: -117.772
Episode: 5/3000 Score: -110.400 	 Mean score over last 100 episodes: -116.298
Episode: 6/3000 Score: -177.752 	 Mean score over last 100 episodes: -126.540
Episode: 7/3000 Score: -102.666 	 Mean score over last 100 episodes: -123.129
Episode: 8/3000 Score: -158.329 	 Mean score over last 100 episodes: -127.529
Episode: 9/3000 Score: -102.445 	 Mean score over last 100 episodes: -124.742
Episode: 10/3000 Score: -101.349 	 Mean score over last 100 episodes: -122.403
Episode: 11/3000 Score: -130.119 	 Mean score over last 100 episodes: -123.104
Episode: 12/3000 Score: -127.579 	 Mean score over last 100 episodes: -123.477
Episode: 13/3000 Score: -110.913 	 Mean score over last 100 e

Episode: 105/3000 Score: -105.329 	 Mean score over last 100 episodes: -113.142
Episode: 106/3000 Score: -103.034 	 Mean score over last 100 episodes: -112.395
Episode: 107/3000 Score: -101.714 	 Mean score over last 100 episodes: -112.385
Episode: 108/3000 Score: -113.483 	 Mean score over last 100 episodes: -111.937
Episode: 109/3000 Score: -102.352 	 Mean score over last 100 episodes: -111.936
Episode: 110/3000 Score: -105.566 	 Mean score over last 100 episodes: -111.978
Episode: 111/3000 Score: -103.748 	 Mean score over last 100 episodes: -111.714
Episode: 112/3000 Score: -102.283 	 Mean score over last 100 episodes: -111.461
Episode: 113/3000 Score: -102.379 	 Mean score over last 100 episodes: -111.376
Episode: 114/3000 Score: -113.067 	 Mean score over last 100 episodes: -111.622
Episode: 115/3000 Score: -102.043 	 Mean score over last 100 episodes: -111.586
Episode: 116/3000 Score: -103.034 	 Mean score over last 100 episodes: -111.407
Episode: 117/3000 Score: -104.051 	 Mean

Episode: 208/3000 Score: -109.683 	 Mean score over last 100 episodes: -109.071
Episode: 209/3000 Score: -111.533 	 Mean score over last 100 episodes: -109.163
Episode: 210/3000 Score: -110.042 	 Mean score over last 100 episodes: -109.207
Episode: 211/3000 Score: -110.751 	 Mean score over last 100 episodes: -109.278
Episode: 212/3000 Score: -109.637 	 Mean score over last 100 episodes: -109.351
Episode: 213/3000 Score: -110.473 	 Mean score over last 100 episodes: -109.432
Episode: 214/3000 Score: -111.551 	 Mean score over last 100 episodes: -109.417
Episode: 215/3000 Score: -109.343 	 Mean score over last 100 episodes: -109.490
Episode: 216/3000 Score: -109.827 	 Mean score over last 100 episodes: -109.558
Episode: 217/3000 Score: -110.245 	 Mean score over last 100 episodes: -109.620
Episode: 218/3000 Score: -109.808 	 Mean score over last 100 episodes: -109.696
Episode: 219/3000 Score: -110.804 	 Mean score over last 100 episodes: -109.739
Episode: 220/3000 Score: -109.995 	 Mean

Episode: 311/3000 Score: -112.442 	 Mean score over last 100 episodes: -118.743
Episode: 312/3000 Score: -107.316 	 Mean score over last 100 episodes: -118.720
Episode: 313/3000 Score: -106.220 	 Mean score over last 100 episodes: -118.677
Episode: 314/3000 Score: -131.380 	 Mean score over last 100 episodes: -118.875
Episode: 315/3000 Score: -140.186 	 Mean score over last 100 episodes: -119.184
Episode: 316/3000 Score: -141.548 	 Mean score over last 100 episodes: -119.501
Episode: 317/3000 Score: -132.753 	 Mean score over last 100 episodes: -119.726
Episode: 318/3000 Score: -113.632 	 Mean score over last 100 episodes: -119.764
Episode: 319/3000 Score: -125.641 	 Mean score over last 100 episodes: -119.913
Episode: 320/3000 Score: -134.804 	 Mean score over last 100 episodes: -120.161
Episode: 321/3000 Score: -111.616 	 Mean score over last 100 episodes: -120.169
Episode: 322/3000 Score: -107.030 	 Mean score over last 100 episodes: -120.146
Episode: 323/3000 Score: -133.681 	 Mean

Episode: 414/3000 Score: -104.973 	 Mean score over last 100 episodes: -114.725
Episode: 415/3000 Score: -95.538 	 Mean score over last 100 episodes: -114.278
Episode: 416/3000 Score: -99.615 	 Mean score over last 100 episodes: -113.859
Episode: 417/3000 Score: -101.308 	 Mean score over last 100 episodes: -113.545
Episode: 418/3000 Score: -110.146 	 Mean score over last 100 episodes: -113.510
Episode: 419/3000 Score: -104.798 	 Mean score over last 100 episodes: -113.301
Episode: 420/3000 Score: -129.620 	 Mean score over last 100 episodes: -113.249
Episode: 421/3000 Score: -108.492 	 Mean score over last 100 episodes: -113.218
Episode: 422/3000 Score: -100.441 	 Mean score over last 100 episodes: -113.152
Episode: 423/3000 Score: -99.756 	 Mean score over last 100 episodes: -112.813
Episode: 424/3000 Score: -100.360 	 Mean score over last 100 episodes: -112.530
Episode: 425/3000 Score: -102.895 	 Mean score over last 100 episodes: -111.976
Episode: 426/3000 Score: -100.916 	 Mean sc

Episode: 517/3000 Score: -108.018 	 Mean score over last 100 episodes: -106.523
Episode: 518/3000 Score: -190.310 	 Mean score over last 100 episodes: -107.325
Episode: 519/3000 Score: -146.216 	 Mean score over last 100 episodes: -107.739
Episode: 520/3000 Score: -135.271 	 Mean score over last 100 episodes: -107.796
Episode: 521/3000 Score: -104.917 	 Mean score over last 100 episodes: -107.760
Episode: 522/3000 Score: -100.442 	 Mean score over last 100 episodes: -107.760
Episode: 523/3000 Score: -110.701 	 Mean score over last 100 episodes: -107.870
Episode: 524/3000 Score: -99.250 	 Mean score over last 100 episodes: -107.858
Episode: 525/3000 Score: -124.384 	 Mean score over last 100 episodes: -108.073
Episode: 526/3000 Score: -100.458 	 Mean score over last 100 episodes: -108.069
Episode: 527/3000 Score: -121.554 	 Mean score over last 100 episodes: -108.289
Episode: 528/3000 Score: -95.873 	 Mean score over last 100 episodes: -108.193
Episode: 529/3000 Score: -132.365 	 Mean s

Episode: 620/3000 Score: -136.563 	 Mean score over last 100 episodes: -134.518
Episode: 621/3000 Score: -155.701 	 Mean score over last 100 episodes: -135.025
Episode: 622/3000 Score: -119.252 	 Mean score over last 100 episodes: -135.213
Episode: 623/3000 Score: -150.476 	 Mean score over last 100 episodes: -135.611
Episode: 624/3000 Score: -116.184 	 Mean score over last 100 episodes: -135.781
Episode: 625/3000 Score: -116.153 	 Mean score over last 100 episodes: -135.698
Episode: 626/3000 Score: -112.696 	 Mean score over last 100 episodes: -135.821
Episode: 627/3000 Score: -108.975 	 Mean score over last 100 episodes: -135.695
Episode: 628/3000 Score: -106.506 	 Mean score over last 100 episodes: -135.801
Episode: 629/3000 Score: -112.338 	 Mean score over last 100 episodes: -135.601
Episode: 630/3000 Score: -129.654 	 Mean score over last 100 episodes: -135.615
Episode: 631/3000 Score: -145.600 	 Mean score over last 100 episodes: -135.955
Episode: 632/3000 Score: -111.816 	 Mean

Episode: 723/3000 Score: -109.956 	 Mean score over last 100 episodes: -115.671
Episode: 724/3000 Score: -110.856 	 Mean score over last 100 episodes: -115.618
Episode: 725/3000 Score: -110.245 	 Mean score over last 100 episodes: -115.559
Episode: 726/3000 Score: -109.423 	 Mean score over last 100 episodes: -115.526
Episode: 727/3000 Score: -109.177 	 Mean score over last 100 episodes: -115.528
Episode: 728/3000 Score: -108.812 	 Mean score over last 100 episodes: -115.551
Episode: 729/3000 Score: -115.475 	 Mean score over last 100 episodes: -115.583
Episode: 730/3000 Score: -112.571 	 Mean score over last 100 episodes: -115.412
Episode: 731/3000 Score: -111.529 	 Mean score over last 100 episodes: -115.071
Episode: 732/3000 Score: -114.308 	 Mean score over last 100 episodes: -115.096
Episode: 733/3000 Score: -117.501 	 Mean score over last 100 episodes: -115.096
Episode: 734/3000 Score: -112.909 	 Mean score over last 100 episodes: -114.955
Episode: 735/3000 Score: -113.630 	 Mean

Episode: 826/3000 Score: -102.600 	 Mean score over last 100 episodes: -112.102
Episode: 827/3000 Score: -114.094 	 Mean score over last 100 episodes: -112.151
Episode: 828/3000 Score: -109.428 	 Mean score over last 100 episodes: -112.158
Episode: 829/3000 Score: -106.745 	 Mean score over last 100 episodes: -112.070
Episode: 830/3000 Score: -132.198 	 Mean score over last 100 episodes: -112.267
Episode: 831/3000 Score: -112.494 	 Mean score over last 100 episodes: -112.276
Episode: 832/3000 Score: -112.461 	 Mean score over last 100 episodes: -112.258
Episode: 833/3000 Score: -116.792 	 Mean score over last 100 episodes: -112.251
Episode: 834/3000 Score: -110.365 	 Mean score over last 100 episodes: -112.225
Episode: 835/3000 Score: -109.865 	 Mean score over last 100 episodes: -112.188
Episode: 836/3000 Score: -108.952 	 Mean score over last 100 episodes: -112.181
Episode: 837/3000 Score: -100.992 	 Mean score over last 100 episodes: -112.028
Episode: 838/3000 Score: -119.135 	 Mean

Episode: 929/3000 Score: -110.391 	 Mean score over last 100 episodes: -129.890
Episode: 930/3000 Score: -136.405 	 Mean score over last 100 episodes: -129.932
Episode: 931/3000 Score: -150.163 	 Mean score over last 100 episodes: -130.308
Episode: 932/3000 Score: -112.437 	 Mean score over last 100 episodes: -130.308
Episode: 933/3000 Score: -115.180 	 Mean score over last 100 episodes: -130.292
Episode: 934/3000 Score: -112.360 	 Mean score over last 100 episodes: -130.312
Episode: 935/3000 Score: -116.614 	 Mean score over last 100 episodes: -130.379
Episode: 936/3000 Score: -150.056 	 Mean score over last 100 episodes: -130.790
Episode: 937/3000 Score: -112.001 	 Mean score over last 100 episodes: -130.901
Episode: 938/3000 Score: -110.213 	 Mean score over last 100 episodes: -130.811
Episode: 939/3000 Score: -131.447 	 Mean score over last 100 episodes: -131.052
Episode: 940/3000 Score: -104.736 	 Mean score over last 100 episodes: -131.014
Episode: 941/3000 Score: -108.253 	 Mean

Episode: 1032/3000 Score: -146.040 	 Mean score over last 100 episodes: -143.354
Episode: 1033/3000 Score: -129.483 	 Mean score over last 100 episodes: -143.497
Episode: 1034/3000 Score: -128.808 	 Mean score over last 100 episodes: -143.661
Episode: 1035/3000 Score: -116.369 	 Mean score over last 100 episodes: -143.659
Episode: 1036/3000 Score: -119.416 	 Mean score over last 100 episodes: -143.353
Episode: 1037/3000 Score: -105.325 	 Mean score over last 100 episodes: -143.286
Episode: 1038/3000 Score: -144.794 	 Mean score over last 100 episodes: -143.632
Episode: 1039/3000 Score: -166.319 	 Mean score over last 100 episodes: -143.980
Episode: 1040/3000 Score: -144.288 	 Mean score over last 100 episodes: -144.376
Episode: 1041/3000 Score: -122.568 	 Mean score over last 100 episodes: -144.519
Episode: 1042/3000 Score: -120.454 	 Mean score over last 100 episodes: -144.476
Episode: 1043/3000 Score: -126.951 	 Mean score over last 100 episodes: -144.487
Episode: 1044/3000 Score: -1

Episode: 1134/3000 Score: -108.049 	 Mean score over last 100 episodes: -125.977
Episode: 1135/3000 Score: -105.051 	 Mean score over last 100 episodes: -125.864
Episode: 1136/3000 Score: -99.359 	 Mean score over last 100 episodes: -125.663
Episode: 1137/3000 Score: -101.950 	 Mean score over last 100 episodes: -125.630
Episode: 1138/3000 Score: -106.080 	 Mean score over last 100 episodes: -125.243
Episode: 1139/3000 Score: -103.666 	 Mean score over last 100 episodes: -124.616
Episode: 1140/3000 Score: -112.454 	 Mean score over last 100 episodes: -124.298
Episode: 1141/3000 Score: -113.945 	 Mean score over last 100 episodes: -124.211
Episode: 1142/3000 Score: -144.665 	 Mean score over last 100 episodes: -124.454
Episode: 1143/3000 Score: -115.800 	 Mean score over last 100 episodes: -124.342
Episode: 1144/3000 Score: -123.239 	 Mean score over last 100 episodes: -124.176
Episode: 1145/3000 Score: -114.021 	 Mean score over last 100 episodes: -124.200
Episode: 1146/3000 Score: -81

Episode: 1236/3000 Score: -107.263 	 Mean score over last 100 episodes: -102.460
Episode: 1237/3000 Score: -125.583 	 Mean score over last 100 episodes: -102.696
Episode: 1238/3000 Score: 19.246 	 Mean score over last 100 episodes: -101.443
Episode: 1239/3000 Score: -144.556 	 Mean score over last 100 episodes: -101.852
Episode: 1240/3000 Score: -108.759 	 Mean score over last 100 episodes: -101.815
Episode: 1241/3000 Score: -134.616 	 Mean score over last 100 episodes: -102.021
Episode: 1242/3000 Score: -139.164 	 Mean score over last 100 episodes: -101.966
Episode: 1243/3000 Score: -142.525 	 Mean score over last 100 episodes: -102.234
Episode: 1244/3000 Score: -6.664 	 Mean score over last 100 episodes: -101.068
Episode: 1245/3000 Score: -32.409 	 Mean score over last 100 episodes: -100.252
Episode: 1246/3000 Score: -129.063 	 Mean score over last 100 episodes: -100.728
Episode: 1247/3000 Score: -138.393 	 Mean score over last 100 episodes: -101.593
Episode: 1248/3000 Score: -144.66

Episode: 1338/3000 Score: -82.881 	 Mean score over last 100 episodes: -104.033
Episode: 1339/3000 Score: -69.548 	 Mean score over last 100 episodes: -103.283
Episode: 1340/3000 Score: -109.721 	 Mean score over last 100 episodes: -103.293
Episode: 1341/3000 Score: -93.871 	 Mean score over last 100 episodes: -102.885
Episode: 1342/3000 Score: -76.032 	 Mean score over last 100 episodes: -102.254
Episode: 1343/3000 Score: -63.404 	 Mean score over last 100 episodes: -101.463
Episode: 1344/3000 Score: -93.416 	 Mean score over last 100 episodes: -102.330
Episode: 1345/3000 Score: -152.633 	 Mean score over last 100 episodes: -103.532
Episode: 1346/3000 Score: -134.779 	 Mean score over last 100 episodes: -103.590
Episode: 1347/3000 Score: -146.325 	 Mean score over last 100 episodes: -103.669
Episode: 1348/3000 Score: -103.663 	 Mean score over last 100 episodes: -103.259
Episode: 1349/3000 Score: -147.498 	 Mean score over last 100 episodes: -103.264
Episode: 1350/3000 Score: -140.783

KeyboardInterrupt: 

In [1]:
agent.test()

NameError: name 'agent' is not defined

In [2]:
agent.visualise()

NameError: name 'agent' is not defined