# Continuous Control

---

### 1. Start the Environment

In [1]:
from unityagents import UnityEnvironment
import numpy as np

env = UnityEnvironment(file_name='apps/Tennis.app')

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: TennisBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 8
        Number of stacked Vector Observation: 3
        Vector Action space type: continuous
        Vector Action space size (per agent): 2
        Vector Action descriptions: , 


Environments contain **_brains_** which are responsible for deciding the actions of their associated agents. Here we check for the first brain available, and set it as the default brain we will be controlling from Python.

In [2]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

In [3]:
# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
print('The state for the first agent looks like:', states[0])

Number of agents: 2
Size of each action: 2
There are 2 agents. Each observes a state with length: 24
The state for the first agent looks like: [ 0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.         -6.65278625 -1.5
 -0.          0.          6.83172083  6.         -0.          0.        ]


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np
import random
import copy
from collections import deque, namedtuple

In [5]:
# Models
def hidden_init(layer):
    fan_in = layer.weight.data.size()[0]
    lim = 1. / np.sqrt(fan_in)
    return (-lim, lim)

class Critic(nn.Module):
    def __init__(self, seed):
        super(Critic, self).__init__()
        self.seed = torch.manual_seed(seed)
        self.fc1 = nn.Linear(48, 400)
        self.fc2 = nn.Linear(404, 300)
        self.fc3 = nn.Linear(300, 1)
        self.reset_parameters()

    def reset_parameters(self):
        self.fc1.weight.data.uniform_(*hidden_init(self.fc1))
        self.fc2.weight.data.uniform_(*hidden_init(self.fc2))
        self.fc3.weight.data.uniform_(-3e-3, 3e-3)
        
    def forward(self, states, actions):
        states = states.reshape(-1, 48)
        actions = actions.reshape(-1, 4)
        x = F.relu(self.fc1(states))
        x = torch.cat((x, actions), -1)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
class Actor(nn.Module):
    def __init__(self, seed):
        super(Actor, self).__init__()
        self.seed = torch.manual_seed(seed)
        self.fc1 = nn.Linear(24, 400)
        self.fc2 = nn.Linear(400, 300)
        self.fc3 = nn.Linear(300, 2)
        self.reset_parameters()

    def reset_parameters(self):
        self.fc1.weight.data.uniform_(*hidden_init(self.fc1))
        self.fc2.weight.data.uniform_(*hidden_init(self.fc2))
        self.fc3.weight.data.uniform_(-3e-3, 3e-3)
        
    def forward(self, states):
        x = F.relu(self.fc1(states))
        x = F.relu(self.fc2(x))
        x = F.tanh(self.fc3(x))
        return x

In [6]:
class Agent(nn.Module):
    def __init__(self, seed):
        super(Agent, self).__init__()
        self.actor = Actor(seed)
        self.critic = Critic(seed)
        self.critic_target = Critic(seed)
        self.optimizer_actor = optim.Adam(self.actor.parameters(), lr = 1e-4)
        self.optimizer_critic = optim.Adam(self.critic.parameters(), lr = 1e-4)
        self.scale = 2
    
    def act(self, states):
        self.actor.eval()
        with torch.no_grad():
            actions = self.actor(states)
        self.actor.train()
        self.scale = max(0.9999 * self.scale, 0.1)   
        actions += np.random.standard_normal(2) * self.scale
        return torch.clamp(actions, -1, 1)
    
    def step(self, states, actions, rewards, next_states, dones, next_actions, actions_pred):
        next_q = self.critic_target(next_states, next_actions).detach()
        q = self.critic(states, actions)
        td_error = torch.mean((rewards + 0.98 * next_q * (1 - dones) - q)**2)

        self.optimizer_critic.zero_grad()
        td_error.backward()
        torch.nn.utils.clip_grad_norm_(self.critic.parameters(), 10)
        self.optimizer_critic.step()
        for target_param, local_param in zip(self.critic_target.parameters(), self.critic.parameters()):
            target_param.data.copy_(0.003*local_param.data + (1.0-0.003)*target_param.data)
        
        rewards_pred = -torch.mean(self.critic(states, actions_pred))

        self.optimizer_actor.zero_grad()
        rewards_pred.backward()
        self.optimizer_actor.step()

In [7]:
class MADDPG(nn.Module):
    def __init__(self):
        super(MADDPG, self).__init__()
        self.maddpg = [Agent(3), Agent(4)]
        self.states = torch.empty(100000, num_agents, 24)
        self.actions = torch.empty(100000, num_agents, 2)
        self.rewards = torch.empty(100000, num_agents, 1)
        self.next_states = torch.empty(100000, num_agents, 24)
        self.dones = torch.empty(100000, num_agents, 1)
        self.idx = 0
        self.count = 0
    
    def act(self, states):
        return torch.stack([ddpg.act(states[i]) for i, ddpg in enumerate(self.maddpg)])
    
    def push(self, states, actions, rewards, next_states, dones):
        self.states[self.idx] = states
        self.actions[self.idx] = actions
        self.rewards[self.idx] = torch.tensor(rewards, dtype = torch.float).unsqueeze(1)
        self.next_states[self.idx] = next_states
        self.dones[self.idx] = torch.tensor(dones, dtype = torch.float).unsqueeze(1)
        
        self.count += 1
        self.idx = self.count % 100000
        
    def sample(self):
        sample_range = min(self.count, 99999)
        batch_idxs = random.sample(range(sample_range), 128)
        return self.states[batch_idxs], self.actions[batch_idxs], self.rewards[batch_idxs], self.next_states[batch_idxs], self.dones[batch_idxs]

    def step(self):
        if self.count >= 128:
            states, actions, rewards, next_states, dones = self.sample()
            rewards = (rewards[:,0,:]+rewards[:,1,:])
            for i, ddpg in enumerate(self.maddpg):
                next_actions = torch.cat(([self.maddpg[j].actor(next_states[:,j,:]).detach() for j in range(len(self.maddpg))]), dim = -1).reshape(128, 2, 2)
                actions_pred = torch.cat(([self.maddpg[j].actor(states[:,j,:]) if j == i else self.maddpg[j].actor(states[:,j,:].detach()) for j in range(len(self.maddpg))]), dim = -1).reshape(128, 2, 2)
                ddpg.step(states, actions, rewards, next_states, dones[:,i,:], next_actions, actions_pred)
                    
maddpg = MADDPG()

In [None]:
scores = []
scores_window = deque(maxlen=100)
for i in range(3000):                                         
    env_info = env.reset(train_mode=True)[brain_name]     
    states = torch.tensor(env_info.vector_observations, dtype = torch.float) 
    score = np.zeros(num_agents) 
    while True:
        actions = maddpg.act(states)
        env_info = env.step(actions.cpu().data.numpy())[brain_name]           
        next_states = torch.tensor(env_info.vector_observations, dtype = torch.float)  
        rewards = env_info.rewards
        dones = env_info.local_done                        
        score += env_info.rewards
        maddpg.push(states, actions, rewards, next_states, dones)
        maddpg.step()
        states = next_states                               
        if np.any(dones):                                  
            break
    scores_window.append(score)       
    scores.append(score)
    
    print('\rEpisode {}\tReward: {:.5f}\tAverage Reward: {:.5f}'.format(i, np.mean(score), np.mean(scores_window)))



Episode 0	Reward: -0.00500	Average Reward: -0.00500
Episode 1	Reward: -0.00500	Average Reward: -0.00500
Episode 2	Reward: -0.00500	Average Reward: -0.00500
Episode 3	Reward: -0.00500	Average Reward: -0.00500
Episode 4	Reward: -0.00500	Average Reward: -0.00500
Episode 5	Reward: 0.04500	Average Reward: 0.00333
Episode 6	Reward: 0.04500	Average Reward: 0.00929
Episode 7	Reward: -0.00500	Average Reward: 0.00750
Episode 8	Reward: -0.00500	Average Reward: 0.00611
Episode 9	Reward: -0.00500	Average Reward: 0.00500
Episode 10	Reward: -0.00500	Average Reward: 0.00409
Episode 11	Reward: -0.00500	Average Reward: 0.00333
Episode 12	Reward: -0.00500	Average Reward: 0.00269
Episode 13	Reward: -0.00500	Average Reward: 0.00214
Episode 14	Reward: 0.04500	Average Reward: 0.00500
Episode 15	Reward: -0.00500	Average Reward: 0.00438
Episode 16	Reward: -0.00500	Average Reward: 0.00382
Episode 17	Reward: -0.00500	Average Reward: 0.00333
Episode 18	Reward: -0.00500	Average Reward: 0.00289
Episode 19	Reward: -

Episode 157	Reward: -0.00500	Average Reward: 0.00100
Episode 158	Reward: -0.00500	Average Reward: 0.00050
Episode 159	Reward: -0.00500	Average Reward: 0.00050
Episode 160	Reward: -0.00500	Average Reward: -0.00050
Episode 161	Reward: -0.00500	Average Reward: -0.00050
Episode 162	Reward: -0.00500	Average Reward: -0.00050
Episode 163	Reward: -0.00500	Average Reward: -0.00050
Episode 164	Reward: -0.00500	Average Reward: -0.00050
Episode 165	Reward: -0.00500	Average Reward: -0.00050
Episode 166	Reward: -0.00500	Average Reward: -0.00050
Episode 167	Reward: -0.00500	Average Reward: -0.00050
Episode 168	Reward: -0.00500	Average Reward: -0.00050
Episode 169	Reward: -0.00500	Average Reward: -0.00050
Episode 170	Reward: -0.00500	Average Reward: -0.00050
Episode 171	Reward: -0.00500	Average Reward: -0.00050
Episode 172	Reward: -0.00500	Average Reward: -0.00050
Episode 173	Reward: -0.00500	Average Reward: -0.00050
Episode 174	Reward: -0.00500	Average Reward: -0.00050
Episode 175	Reward: -0.00500	Av

Episode 310	Reward: -0.00500	Average Reward: -0.00250
Episode 311	Reward: -0.00500	Average Reward: -0.00250
Episode 312	Reward: -0.00500	Average Reward: -0.00250
Episode 313	Reward: -0.00500	Average Reward: -0.00300
Episode 314	Reward: -0.00500	Average Reward: -0.00300
Episode 315	Reward: -0.00500	Average Reward: -0.00300
Episode 316	Reward: -0.00500	Average Reward: -0.00300
Episode 317	Reward: -0.00500	Average Reward: -0.00300
Episode 318	Reward: -0.00500	Average Reward: -0.00300
Episode 319	Reward: 0.04500	Average Reward: -0.00250
Episode 320	Reward: -0.00500	Average Reward: -0.00250
Episode 321	Reward: -0.00500	Average Reward: -0.00250
Episode 322	Reward: -0.00500	Average Reward: -0.00250
Episode 323	Reward: -0.00500	Average Reward: -0.00250
Episode 324	Reward: 0.04500	Average Reward: -0.00200
Episode 325	Reward: -0.00500	Average Reward: -0.00200
Episode 326	Reward: -0.00500	Average Reward: -0.00200
Episode 327	Reward: -0.00500	Average Reward: -0.00200
Episode 328	Reward: -0.00500	A

Episode 465	Reward: 0.04500	Average Reward: 0.01350
Episode 466	Reward: 0.04500	Average Reward: 0.01400
Episode 467	Reward: -0.00500	Average Reward: 0.01400
Episode 468	Reward: -0.00500	Average Reward: 0.01350
Episode 469	Reward: 0.04500	Average Reward: 0.01400
Episode 470	Reward: 0.04500	Average Reward: 0.01450
Episode 471	Reward: 0.04500	Average Reward: 0.01500
Episode 472	Reward: -0.00500	Average Reward: 0.01450
Episode 473	Reward: -0.00500	Average Reward: 0.01450
Episode 474	Reward: -0.00500	Average Reward: 0.01450
Episode 475	Reward: 0.04500	Average Reward: 0.01500
Episode 476	Reward: -0.00500	Average Reward: 0.01500
Episode 477	Reward: 0.04500	Average Reward: 0.01550
Episode 478	Reward: 0.04500	Average Reward: 0.01600
Episode 479	Reward: 0.04500	Average Reward: 0.01600
Episode 480	Reward: 0.04500	Average Reward: 0.01650
Episode 481	Reward: 0.04500	Average Reward: 0.01650
Episode 482	Reward: 0.04500	Average Reward: 0.01700
Episode 483	Reward: 0.04500	Average Reward: 0.01700
Episod

Episode 622	Reward: 0.04500	Average Reward: 0.02800
Episode 623	Reward: 0.04500	Average Reward: 0.02800
Episode 624	Reward: -0.00500	Average Reward: 0.02800
Episode 625	Reward: 0.04500	Average Reward: 0.02850
Episode 626	Reward: 0.04500	Average Reward: 0.02900
Episode 627	Reward: 0.04500	Average Reward: 0.02850
Episode 628	Reward: -0.00500	Average Reward: 0.02800
Episode 629	Reward: -0.00500	Average Reward: 0.02750
Episode 630	Reward: 0.04500	Average Reward: 0.02800
Episode 631	Reward: -0.00500	Average Reward: 0.02750
Episode 632	Reward: 0.04500	Average Reward: 0.02700
Episode 633	Reward: 0.04500	Average Reward: 0.02750
Episode 634	Reward: -0.00500	Average Reward: 0.02600
Episode 635	Reward: 0.04500	Average Reward: 0.02600
Episode 636	Reward: 0.04500	Average Reward: 0.02600
Episode 637	Reward: 0.04500	Average Reward: 0.02650
Episode 638	Reward: -0.00500	Average Reward: 0.02600
Episode 639	Reward: 0.04500	Average Reward: 0.02600
Episode 640	Reward: 0.04500	Average Reward: 0.02600
Episod

Episode 779	Reward: 0.04500	Average Reward: 0.03650
Episode 780	Reward: 0.04500	Average Reward: 0.03650
Episode 781	Reward: 0.04500	Average Reward: 0.03650
Episode 782	Reward: 0.04500	Average Reward: 0.03700
Episode 783	Reward: -0.00500	Average Reward: 0.03650
Episode 784	Reward: 0.04500	Average Reward: 0.03650
Episode 785	Reward: 0.04500	Average Reward: 0.03700
Episode 786	Reward: 0.04500	Average Reward: 0.03700
Episode 787	Reward: 0.04500	Average Reward: 0.03700
Episode 788	Reward: 0.04500	Average Reward: 0.03750
Episode 789	Reward: -0.00500	Average Reward: 0.03750
Episode 790	Reward: 0.04500	Average Reward: 0.03800
Episode 791	Reward: 0.04500	Average Reward: 0.03700
Episode 792	Reward: 0.04500	Average Reward: 0.03700
Episode 793	Reward: 0.04500	Average Reward: 0.03700
Episode 794	Reward: 0.04500	Average Reward: 0.03700
Episode 795	Reward: 0.04500	Average Reward: 0.03750
Episode 796	Reward: 0.04500	Average Reward: 0.03750
Episode 797	Reward: 0.04500	Average Reward: 0.03750
Episode 79

Episode 937	Reward: 0.04500	Average Reward: 0.04350
Episode 938	Reward: 0.04500	Average Reward: 0.04350
Episode 939	Reward: 0.04500	Average Reward: 0.04350
Episode 940	Reward: 0.04500	Average Reward: 0.04350
Episode 941	Reward: 0.04500	Average Reward: 0.04350
Episode 942	Reward: 0.04500	Average Reward: 0.04350
Episode 943	Reward: 0.04500	Average Reward: 0.04350
Episode 944	Reward: 0.04500	Average Reward: 0.04350
Episode 945	Reward: 0.04500	Average Reward: 0.04350
Episode 946	Reward: 0.04500	Average Reward: 0.04350
Episode 947	Reward: 0.04500	Average Reward: 0.04350
Episode 948	Reward: 0.04500	Average Reward: 0.04350
Episode 949	Reward: 0.04500	Average Reward: 0.04350
Episode 950	Reward: 0.04500	Average Reward: 0.04350
Episode 951	Reward: 0.04500	Average Reward: 0.04350
Episode 952	Reward: 0.24500	Average Reward: 0.04550
Episode 953	Reward: 0.04500	Average Reward: 0.04550
Episode 954	Reward: 0.04500	Average Reward: 0.04500
Episode 955	Reward: 0.04500	Average Reward: 0.04500
Episode 956	

Episode 1093	Reward: 0.09500	Average Reward: 0.08590
Episode 1094	Reward: 0.14500	Average Reward: 0.08690
Episode 1095	Reward: 0.09500	Average Reward: 0.08740
Episode 1096	Reward: 0.04500	Average Reward: 0.08690
Episode 1097	Reward: 0.04500	Average Reward: 0.08640
Episode 1098	Reward: 0.09500	Average Reward: 0.08640
Episode 1099	Reward: 0.04500	Average Reward: 0.08640
Episode 1100	Reward: 0.04500	Average Reward: 0.08640
Episode 1101	Reward: 0.09500	Average Reward: 0.08690
Episode 1102	Reward: 0.09500	Average Reward: 0.08740
Episode 1103	Reward: 0.09500	Average Reward: 0.08790
Episode 1104	Reward: 0.09500	Average Reward: 0.08740
Episode 1105	Reward: 0.14500	Average Reward: 0.08840
Episode 1106	Reward: 0.04500	Average Reward: 0.08840
Episode 1107	Reward: 0.04500	Average Reward: 0.08840
Episode 1108	Reward: 0.14500	Average Reward: 0.08890
Episode 1109	Reward: 0.09500	Average Reward: 0.08940
Episode 1110	Reward: 0.09500	Average Reward: 0.08990
Episode 1111	Reward: 0.09500	Average Reward: 0

Episode 1248	Reward: 0.09500	Average Reward: 0.07995
Episode 1249	Reward: 0.19500	Average Reward: 0.08045
Episode 1250	Reward: 0.04500	Average Reward: 0.08045
Episode 1251	Reward: 0.04500	Average Reward: 0.07995
Episode 1252	Reward: 0.04500	Average Reward: 0.07945
Episode 1253	Reward: 0.04500	Average Reward: 0.07895
Episode 1254	Reward: 0.04500	Average Reward: 0.07895
Episode 1255	Reward: 0.04500	Average Reward: 0.07745
Episode 1256	Reward: 0.14500	Average Reward: 0.07795
Episode 1257	Reward: 0.04500	Average Reward: 0.07745
Episode 1258	Reward: 0.04500	Average Reward: 0.07695
Episode 1259	Reward: 0.04500	Average Reward: 0.07595
Episode 1260	Reward: 0.04500	Average Reward: 0.07545
Episode 1261	Reward: 0.14500	Average Reward: 0.07550
Episode 1262	Reward: 0.09500	Average Reward: 0.07600
Episode 1263	Reward: 0.09500	Average Reward: 0.07650
Episode 1264	Reward: 0.14500	Average Reward: 0.07700
Episode 1265	Reward: 0.04500	Average Reward: 0.07700
Episode 1266	Reward: 0.04500	Average Reward: 0

Episode 1403	Reward: -0.00500	Average Reward: 0.09500
Episode 1404	Reward: 0.14500	Average Reward: 0.09600
Episode 1405	Reward: 0.04500	Average Reward: 0.09600
Episode 1406	Reward: 0.24500	Average Reward: 0.09800
Episode 1407	Reward: 0.04500	Average Reward: 0.09800
Episode 1408	Reward: 0.14500	Average Reward: 0.09850
Episode 1409	Reward: 0.04500	Average Reward: 0.09850
Episode 1410	Reward: 0.24500	Average Reward: 0.09900
Episode 1411	Reward: 0.19500	Average Reward: 0.10050
Episode 1412	Reward: 0.14500	Average Reward: 0.10150
Episode 1413	Reward: 0.14500	Average Reward: 0.10250
Episode 1414	Reward: 0.14500	Average Reward: 0.10350
Episode 1415	Reward: 0.14500	Average Reward: 0.10350
Episode 1416	Reward: 0.09500	Average Reward: 0.10300
Episode 1417	Reward: 0.04500	Average Reward: 0.10250
Episode 1418	Reward: 0.19500	Average Reward: 0.10350
Episode 1419	Reward: 0.14500	Average Reward: 0.10450
Episode 1420	Reward: 0.44500	Average Reward: 0.10800
Episode 1421	Reward: 0.14500	Average Reward: 

Episode 1558	Reward: 0.54500	Average Reward: 0.42405
Episode 1559	Reward: 0.04500	Average Reward: 0.42205
Episode 1560	Reward: 1.09500	Average Reward: 0.42705
Episode 1561	Reward: 0.04500	Average Reward: 0.42655
Episode 1562	Reward: 0.09500	Average Reward: 0.42705
Episode 1563	Reward: 0.59500	Average Reward: 0.43255
Episode 1564	Reward: 0.29500	Average Reward: 0.43155
Episode 1565	Reward: 0.79500	Average Reward: 0.43505
Episode 1566	Reward: 1.64500	Average Reward: 0.44655
Episode 1567	Reward: 0.09500	Average Reward: 0.44705
Episode 1568	Reward: 0.09500	Average Reward: 0.44705
Episode 1569	Reward: 0.94500	Average Reward: 0.45305
Episode 1570	Reward: 0.89500	Average Reward: 0.46105
Episode 1571	Reward: 0.49500	Average Reward: 0.46105
Episode 1572	Reward: 0.14500	Average Reward: 0.45855
Episode 1573	Reward: 0.04500	Average Reward: 0.45705
Episode 1574	Reward: 0.09500	Average Reward: 0.45705
Episode 1575	Reward: 0.59500	Average Reward: 0.46105
Episode 1576	Reward: 0.84500	Average Reward: 0

Episode 1713	Reward: 2.60000	Average Reward: 0.75050
Episode 1714	Reward: 2.09500	Average Reward: 0.77100
Episode 1715	Reward: 0.04500	Average Reward: 0.74595
Episode 1716	Reward: 0.04500	Average Reward: 0.73995
Episode 1717	Reward: 0.04500	Average Reward: 0.73995
Episode 1718	Reward: 2.04500	Average Reward: 0.75395
Episode 1719	Reward: 0.04500	Average Reward: 0.75395
Episode 1720	Reward: 0.04500	Average Reward: 0.75145
Episode 1721	Reward: 2.24500	Average Reward: 0.77345
Episode 1722	Reward: 0.29500	Average Reward: 0.76945
Episode 1723	Reward: 2.60000	Average Reward: 0.79500
Episode 1724	Reward: 2.65000	Average Reward: 0.80455
Episode 1725	Reward: 0.04500	Average Reward: 0.79605
Episode 1726	Reward: 0.04500	Average Reward: 0.77905
Episode 1727	Reward: 0.04500	Average Reward: 0.77905
Episode 1728	Reward: 0.04500	Average Reward: 0.75350
Episode 1729	Reward: 2.60000	Average Reward: 0.77255
Episode 1730	Reward: 0.09500	Average Reward: 0.77305
Episode 1731	Reward: 2.04500	Average Reward: 0

Episode 1868	Reward: 0.09500	Average Reward: 1.34150
Episode 1869	Reward: 0.04500	Average Reward: 1.31645
Episode 1870	Reward: 0.04500	Average Reward: 1.29040
Episode 1871	Reward: 2.60000	Average Reward: 1.30745
Episode 1872	Reward: 2.65000	Average Reward: 1.32400
Episode 1873	Reward: 0.04500	Average Reward: 1.32200
Episode 1874	Reward: 0.04500	Average Reward: 1.32100
Episode 1875	Reward: 0.29500	Average Reward: 1.32350
Episode 1876	Reward: 0.04500	Average Reward: 1.32350
Episode 1877	Reward: 2.60000	Average Reward: 1.34905
Episode 1878	Reward: 0.94500	Average Reward: 1.35405
Episode 1879	Reward: 0.04500	Average Reward: 1.35055
Episode 1880	Reward: 0.04500	Average Reward: 1.32500
Episode 1881	Reward: 0.94500	Average Reward: 1.30795
Episode 1882	Reward: 0.14500	Average Reward: 1.28390
Episode 1883	Reward: 0.04500	Average Reward: 1.27940
Episode 1884	Reward: 2.60000	Average Reward: 1.30495
Episode 1885	Reward: 0.04500	Average Reward: 1.29945
Episode 1886	Reward: 0.74500	Average Reward: 1

Episode 2023	Reward: 2.60000	Average Reward: 1.59115
Episode 2024	Reward: 0.54500	Average Reward: 1.57010
Episode 2025	Reward: 0.59500	Average Reward: 1.55310
Episode 2026	Reward: 2.60000	Average Reward: 1.57765
Episode 2027	Reward: 0.04500	Average Reward: 1.57765
Episode 2028	Reward: 2.60000	Average Reward: 1.60320
Episode 2029	Reward: 0.14500	Average Reward: 1.58820
Episode 2030	Reward: 2.44500	Average Reward: 1.58665
Episode 2031	Reward: 2.60000	Average Reward: 1.58615
Episode 2032	Reward: 2.14500	Average Reward: 1.58160
Episode 2033	Reward: 2.60000	Average Reward: 1.58160
Episode 2034	Reward: 2.14500	Average Reward: 1.60210
Episode 2035	Reward: 0.19500	Average Reward: 1.58610
Episode 2036	Reward: 2.60000	Average Reward: 1.58610
Episode 2037	Reward: 1.44500	Average Reward: 1.59815
Episode 2038	Reward: 2.60000	Average Reward: 1.59815
