## Libraries

In [1]:
import numpy as np
import gym
import matplotlib.pyplot as plt
import time

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()

## Hyperparameters

In [2]:
ENV = 'LunarLander-v2'

EPISODES = 50000 # Number of times the enviroment is ran

LOSS_CLIPPING = 0.2 # Approximated values stated in the original paper
EPOCHS = 20 # Epochs to train the network (recommended between 3 and 30)
NOISE = 1.0 # Standard deviation

GAMMA = 0.99 # Used for the estimated reward

BUFFER_SIZE = 1024 # Buffer of experiences
BATCH_SIZE = 512 # Batch size for the neural nets
NUM_ACTIONS = 4 # Number of possible action in the environment
NUM_STATES = 8 # Number of possible states in the exvironment
ENTROPY_LOSS = 0.001 # Constant value for the entropy (entropy used for exploration)
LR = 0.00025  # Learning rate

# Used to feed the actor when predicting
DUMMY_ACTION, DUMMY_VALUE = np.zeros((1, NUM_ACTIONS)), np.zeros((1, 1))

## Class memory


In [3]:
# Memory of the experiences
class Memory:
    def __init__(self):
        self.episode_batch = [[], [], []]
        self.global_batch = [[], [], [], []]
        self.episode_rewards = []
        self.historical_rewards = []

    def reset_espisode_batch(self):
        self.global_batch = [[], [], [], []]
        self.episode_batch = [[], [], []]
        self.episode_rewards = []

## Custom loss function for PPO

In [4]:
# Custom loss functions for the PPO
def proximal_policy_optimization_loss(advantages, prediction_picks):
    def loss(actions, y_pred):
        #advantages, prediction_picks, actions = y_true[:, :1], y_true[:, 1:1+self.action_space], y_true[:, 1+self.action_space:]
        LOSS_CLIPPING = 0.2
        ENTROPY_LOSS = 0.001
        
        prob = actions * y_pred
        old_prob = actions * prediction_picks

        prob = K.clip(prob, 1e-10, 1.0)
        old_prob = K.clip(old_prob, 1e-10, 1.0)

        ratio = K.exp(K.log(prob) - K.log(old_prob))
        
        p1 = ratio * advantages
        p2 = K.clip(ratio, min_value=1 - LOSS_CLIPPING, max_value=1 + LOSS_CLIPPING) * advantages

        actor_loss = -K.mean(K.minimum(p1, p2))

        entropy = -(y_pred * K.log(y_pred + 1e-10))
        entropy = ENTROPY_LOSS * K.mean(entropy)
        
        total_loss = actor_loss - entropy

        return total_loss
    return loss

## PPO Agent

In [5]:
class Agent:
    # Constructor of the class
    def __init__(self):
        self.actor = self.create_actor()
        self.critic = self.create_critic()
        self.memory = Memory()

    # Create the actor used to select the action given an state
    def create_actor(self):
        # Define three inputs as the advantage and old prediction is used for the custom loss
        input_state = Input(shape=(NUM_STATES,))
        input_advantage = Input(shape=(1,))
        imput_old_prediction = Input(shape=(NUM_ACTIONS,))

        x = Dense(512, activation='relu', kernel_initializer=tf.random_normal_initializer(stddev=0.01))(input_state)
        x = Dense(256, activation='relu', kernel_initializer=tf.random_normal_initializer(stddev=0.01))(x)
        x = Dense(64, activation='relu', kernel_initializer=tf.random_normal_initializer(stddev=0.01))(x)

        # Softmax as there are different probabilities depending on the action
        output_layer = Dense(NUM_ACTIONS, activation='softmax', name='output')(x)

        model = Model(inputs=[input_state, input_advantage, imput_old_prediction], outputs=[output_layer])

        # Compile the model with the custom loss
        model.compile(optimizer=Adam(lr=LR),
                      loss=[proximal_policy_optimization_loss(
                          advantages=input_advantage,
                          prediction_picks=imput_old_prediction)])
        model.summary()

        return model

    # Create the critic which will criticise how the actor is performing
    def create_critic(self):
        # Define the architectire of the network
        input_layer = Input(shape=(NUM_STATES,))
        
        x = Dense(512, activation='relu', kernel_initializer=tf.random_normal_initializer(stddev=0.01))(input_layer)
        x = Dense(256, activation='relu', kernel_initializer=tf.random_normal_initializer(stddev=0.01))(x)
        x = Dense(64, activation='relu', kernel_initializer=tf.random_normal_initializer(stddev=0.01))(x)


        # Linear output to know how good the action is
        ouput_layer = Dense(1)(x)

        model = Model(inputs=[input_layer], outputs=[ouput_layer])

        # Compile it with mse loss and gradient descent
        model.compile(optimizer=Adam(lr=LR), 
                      loss='mse')

        return model

    # Get the action given the current state
    def get_action(self, state):
        probabilities = self.actor.predict([state.reshape(1, NUM_STATES), DUMMY_VALUE, DUMMY_ACTION])
        action = np.random.choice(NUM_ACTIONS, p=np.nan_to_num(probabilities[0]))
        action_matrix = np.zeros(NUM_ACTIONS)
        action_matrix[action] = 1
        return action, action_matrix, probabilities

    # Transform rewards of the episode as the discount has to be applied in the first ones regarding the last ones
    def transform_reward(self):
        self.memory.historical_rewards.append(sum(self.memory.episode_rewards))

        for j in range(len(self.memory.episode_rewards) - 2, -1, -1):
            self.memory.episode_rewards[j] += self.memory.episode_rewards[j + 1] * GAMMA

## Environment

In [6]:
# Create simulated environment
def create_environment():
    environment = gym.make(ENV)
    return environment

In [7]:
if __name__ == '__main__':
    start = time.time()
    env = create_environment() 
    ag = Agent()
    episode = 1
    goal_reached = False

    # Iterate until the number of episodes is reached
    while episode < EPISODES:
        # Reset episode and global batch and the environment to start a new one
        ag.memory.reset_espisode_batch()
        state = env.reset()
        states = []
        next_states = []
        dones = []
        # Iterate until the global batch is bigger than the buffer
        while len(ag.memory.global_batch[0]) < BUFFER_SIZE:
            # Get the action given the current state
            action, action_matrix, predicted_action = ag.get_action(state)
            # Step in the enviroment and gather all parameters
            next_state, reward, done, info = env.step(action)
            ag.memory.episode_rewards.append(reward)
            last_reward = sum(ag.memory.episode_rewards)

            # Add to the episode batch the informetions gathered
            ag.memory.episode_batch[0].append(state)
            ag.memory.episode_batch[1].append(action_matrix)
            ag.memory.episode_batch[2].append(predicted_action)
            states.append(state)
            next_states.append(next_state)
            state = next_state
            dones.append(done)

            # If the episode has finished
            if done:
                # Transform rewards with discount rate
                ag.transform_reward()
                print('Last reward: ', ag.memory.episode_rewards[-1])
                # Iterate through all instances of the episode batch
                for i in range(len(ag.memory.episode_batch[0])):
                    obs, action, pred = ag.memory.episode_batch[0][i], ag.memory.episode_batch[1][i], ag.memory.episode_batch[2][i]
                    r = ag.memory.episode_rewards[i]

                    # Add all experienes to global batch
                    ag.memory.global_batch[0].append(obs)
                    ag.memory.global_batch[1].append(action)
                    ag.memory.global_batch[2].append(pred)
                    ag.memory.global_batch[3].append(r)
                
                # Reset the episode batch, episode reward and environment
                ag.memory.episode_batch = [[], [], []]
                ag.memory.episode_rewards = []
                state = env.reset()

                if episode >= 100:
                    print('EPISODE: ', episode, ' - SCORE: ', ag.memory.historical_rewards[-1], ' - MEAN SCORE: ', sum(ag.memory.historical_rewards[-100:])/100)

                    if sum(ag.memory.historical_rewards[-100:])/100 >= 200:
                        goal_reached = True
                        break
                else:
                    print('EPISODE: ', episode, ' - SCORE: ', ag.memory.historical_rewards[-1], ' - MEAN SCORE: ', sum(ag.memory.historical_rewards)/len(ag.memory.historical_rewards))
                
                episode += 1

        if goal_reached == True:
            break

        # Get the arrays to feed te neural nets
        obs, action, pred, reward = np.array(ag.memory.global_batch[0]), np.array(ag.memory.global_batch[1]), np.array(ag.memory.global_batch[2]), np.reshape(np.array(ag.memory.global_batch[3]), (len(ag.memory.global_batch[3]), 1))
        pred = np.reshape(pred, (pred.shape[0], pred.shape[2]))

        # Ensure that they all have the same size
        obs, action, pred, reward = obs[:BUFFER_SIZE], action[:BUFFER_SIZE], pred[:BUFFER_SIZE], reward[:BUFFER_SIZE]
        states, next_states = np.array(states[:BUFFER_SIZE]), np.array(next_states[:BUFFER_SIZE])
        old_prediction = pred

        
        
        
        values = ag.critic.predict(states)
        next_values = ag.critic.predict(next_states)
        gamma = 0.99
        lamda = 0.9
        deltas = [r + gamma * (1 - d) * nv - v for r, d, nv, v in zip(reward, dones, next_values, values)]
        
        for t in reversed(range(len(deltas) - 1)):
            deltas[t] = deltas[t] + (1 - dones[t]) * gamma * lamda * deltas[t + 1]
        #print(np.shape(deltas))
        deltas = np.array(deltas)
        
        deltas = (deltas - deltas.mean()) / (deltas.std() + 1e-8)
        
        
        # Get advantage with the rewards taken from the environment predictions of the critic given the states
        #advantage = reward - pred_values
        print('ACTUALIZA')
        # Update weights of the actor with the states, advanges obtained from the critic and predictions made. Target values are actions
        actor_loss = ag.actor.fit([obs, deltas, old_prediction], [action], batch_size=BATCH_SIZE, shuffle=True, epochs=EPOCHS, verbose=0)
        # Update weights of the critic with the states. Targe values are actual rewards
        critic_loss = ag.critic.fit([obs], [reward], batch_size=BATCH_SIZE, shuffle=True, epochs=EPOCHS, verbose=0)

    # Plot a graph with the rewards over the episodes
    plt.plot(ag.memory.historical_rewards)
    plt.xlabel("Episode")
    plt.ylabel("Score")
    
    end = time.time()
    print((end-start)/60, 'MINUTES')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 8)]          0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 512)          4608        input_1[0][0]                    
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 256)          131328      dense[0][0]                      
__________________________________________________________________________________________________
dense_2 (Dense)                 (None, 64)           16448       dense_1[0][0]                    
_____________

Last reward:  -100
EPISODE:  62  - SCORE:  -244.83831116307974  - MEAN SCORE:  -151.33635020312946
Last reward:  -100
EPISODE:  63  - SCORE:  -93.56846569654215  - MEAN SCORE:  -150.41939965540584
Last reward:  -100
EPISODE:  64  - SCORE:  -110.493256088692  - MEAN SCORE:  -149.79555366217593
Last reward:  -100
EPISODE:  65  - SCORE:  -136.15373792059026  - MEAN SCORE:  -149.58567957384386
Last reward:  -100
EPISODE:  66  - SCORE:  -254.02221814685174  - MEAN SCORE:  -151.16805137040458
Last reward:  -100
EPISODE:  67  - SCORE:  -161.67726496291357  - MEAN SCORE:  -151.32490530462113
Last reward:  -100
EPISODE:  68  - SCORE:  -124.75863661113293  - MEAN SCORE:  -150.9342248826581
Last reward:  -100
EPISODE:  69  - SCORE:  -135.95922057150355  - MEAN SCORE:  -150.71719583467035
Last reward:  -100
EPISODE:  70  - SCORE:  -134.82254408288264  - MEAN SCORE:  -150.4901293810734
Last reward:  -100
EPISODE:  71  - SCORE:  -209.06617676520614  - MEAN SCORE:  -151.31514413296256
Last reward:  -

Last reward:  -100
EPISODE:  145  - SCORE:  -250.6326173010915  - MEAN SCORE:  -158.7886545351671
Last reward:  -100
EPISODE:  146  - SCORE:  -112.90293888865398  - MEAN SCORE:  -158.7956017572812
Last reward:  -100
EPISODE:  147  - SCORE:  -114.69385603229117  - MEAN SCORE:  -157.6999980635805
Last reward:  -100
EPISODE:  148  - SCORE:  -193.79165466356534  - MEAN SCORE:  -158.61170015416562
Last reward:  -100
EPISODE:  149  - SCORE:  -189.0107400082078  - MEAN SCORE:  -159.32645869020504
Last reward:  -100
EPISODE:  150  - SCORE:  -158.69560443526592  - MEAN SCORE:  -159.61221393033065
ACTUALIZA
Last reward:  -100
EPISODE:  151  - SCORE:  -103.7049517338339  - MEAN SCORE:  -160.95220770313372
Last reward:  -100
EPISODE:  152  - SCORE:  -161.9045966933516  - MEAN SCORE:  -161.52177648489157
Last reward:  -100
EPISODE:  153  - SCORE:  -216.56246962801762  - MEAN SCORE:  -162.31967207186307
Last reward:  -100
EPISODE:  154  - SCORE:  -165.0332501901438  - MEAN SCORE:  -161.8738815298521

Last reward:  -100
EPISODE:  227  - SCORE:  -141.07405104260684  - MEAN SCORE:  -138.54900776700472
Last reward:  -100
EPISODE:  228  - SCORE:  -177.2278068106695  - MEAN SCORE:  -138.23567333375112
Last reward:  -100
EPISODE:  229  - SCORE:  -107.46443056291423  - MEAN SCORE:  -138.1400024986598
Last reward:  -100
EPISODE:  230  - SCORE:  -201.85925104469186  - MEAN SCORE:  -139.33420526517176
Last reward:  -100
EPISODE:  231  - SCORE:  -79.60076552525611  - MEAN SCORE:  -138.76464800936049
Last reward:  -100
EPISODE:  232  - SCORE:  -74.04169082368523  - MEAN SCORE:  -138.35004723149382
Last reward:  -100
EPISODE:  233  - SCORE:  -145.66062424295944  - MEAN SCORE:  -137.94544490597266
Last reward:  -100
EPISODE:  234  - SCORE:  -107.95475760867842  - MEAN SCORE:  -136.20340078849418
ACTUALIZA
Last reward:  -100
EPISODE:  235  - SCORE:  -165.83593919025625  - MEAN SCORE:  -134.6772904244385
Last reward:  -100
EPISODE:  236  - SCORE:  -63.794768830566284  - MEAN SCORE:  -133.5701617546

Last reward:  -100
EPISODE:  310  - SCORE:  -117.50641187086761  - MEAN SCORE:  -128.19531921502187
Last reward:  -100
EPISODE:  311  - SCORE:  -86.5698356581359  - MEAN SCORE:  -128.10936814494164
Last reward:  -100
EPISODE:  312  - SCORE:  -132.9592213466031  - MEAN SCORE:  -128.13862033621785
ACTUALIZA
Last reward:  -100
EPISODE:  313  - SCORE:  -85.65615166612919  - MEAN SCORE:  -128.10061131122922
Last reward:  -100
EPISODE:  314  - SCORE:  -106.2720301367716  - MEAN SCORE:  -128.03678491010177
Last reward:  -100
EPISODE:  315  - SCORE:  -135.38381743403363  - MEAN SCORE:  -127.89506773465686
Last reward:  -100
EPISODE:  316  - SCORE:  -134.8442892656309  - MEAN SCORE:  -128.15869516117314
Last reward:  -100
EPISODE:  317  - SCORE:  -131.4941074938049  - MEAN SCORE:  -127.56501807701305
Last reward:  -100
EPISODE:  318  - SCORE:  -110.34526313535244  - MEAN SCORE:  -127.74966162744948
Last reward:  -100
EPISODE:  319  - SCORE:  -97.6153952236985  - MEAN SCORE:  -127.97992569037831

Last reward:  -100
EPISODE:  393  - SCORE:  -83.71208893368512  - MEAN SCORE:  -119.19014082143259
Last reward:  -100
EPISODE:  394  - SCORE:  -251.49920940910033  - MEAN SCORE:  -120.48022157967952
Last reward:  -100
EPISODE:  395  - SCORE:  -106.4848445076856  - MEAN SCORE:  -120.16273147525473
Last reward:  -100
EPISODE:  396  - SCORE:  -92.25025400241468  - MEAN SCORE:  -119.83018174747227
Last reward:  -100
EPISODE:  397  - SCORE:  -57.67696270004441  - MEAN SCORE:  -119.6438478323725
Last reward:  -100
EPISODE:  398  - SCORE:  -123.5856387735241  - MEAN SCORE:  -118.47989650380397
Last reward:  -100
EPISODE:  399  - SCORE:  -116.29080070707646  - MEAN SCORE:  -118.61315581075425
Last reward:  -100
EPISODE:  400  - SCORE:  -91.21994535000204  - MEAN SCORE:  -118.07064863881888
Last reward:  -100
EPISODE:  401  - SCORE:  -79.5309447895464  - MEAN SCORE:  -117.94102178197292
Last reward:  -100
EPISODE:  402  - SCORE:  -100.2926832394847  - MEAN SCORE:  -117.82237657036181
ACTUALIZA


Last reward:  -100
EPISODE:  475  - SCORE:  -105.78332176416394  - MEAN SCORE:  -117.4057492108153
Last reward:  -100
EPISODE:  476  - SCORE:  -113.63650308792491  - MEAN SCORE:  -117.1260534224043
Last reward:  -100
EPISODE:  477  - SCORE:  -78.40018791240414  - MEAN SCORE:  -116.485916093728
Last reward:  -100
EPISODE:  478  - SCORE:  -106.43434365360773  - MEAN SCORE:  -116.58142248694418
Last reward:  -100
EPISODE:  479  - SCORE:  -127.75708047485116  - MEAN SCORE:  -116.45093121096039
Last reward:  -100
EPISODE:  480  - SCORE:  -100.5291037762849  - MEAN SCORE:  -116.69026663580584
Last reward:  -100
EPISODE:  481  - SCORE:  -122.61469920893106  - MEAN SCORE:  -116.64381024949475
Last reward:  -100
EPISODE:  482  - SCORE:  -121.06795295872078  - MEAN SCORE:  -116.76909859771318
Last reward:  -100
EPISODE:  483  - SCORE:  -95.83891923903204  - MEAN SCORE:  -115.7872346594724
Last reward:  -100
EPISODE:  484  - SCORE:  -73.52618713262132  - MEAN SCORE:  -114.98447735957131
Last rewa

Last reward:  -100
EPISODE:  557  - SCORE:  -121.11247600957718  - MEAN SCORE:  -115.30647023229682
Last reward:  -100
EPISODE:  558  - SCORE:  -145.63231817124228  - MEAN SCORE:  -115.44594377738429
Last reward:  -100
EPISODE:  559  - SCORE:  -112.5064678549247  - MEAN SCORE:  -115.46700923494042
Last reward:  -100
EPISODE:  560  - SCORE:  -102.03649636737639  - MEAN SCORE:  -115.18717625850856
Last reward:  -100
EPISODE:  561  - SCORE:  -121.70616716009017  - MEAN SCORE:  -115.5986868063705
Last reward:  -100
EPISODE:  562  - SCORE:  -97.33901078480349  - MEAN SCORE:  -114.75368814790488
Last reward:  -100
EPISODE:  563  - SCORE:  -154.62519955099532  - MEAN SCORE:  -115.22129371501569
Last reward:  -100
EPISODE:  564  - SCORE:  -85.11988903951456  - MEAN SCORE:  -115.26014692137419
Last reward:  -100
EPISODE:  565  - SCORE:  -18.573820015042514  - MEAN SCORE:  -114.44277037521607
ACTUALIZA
Last reward:  -100
EPISODE:  566  - SCORE:  -121.36698360866795  - MEAN SCORE:  -114.539905210

EPISODE:  639  - SCORE:  -175.88777109957493  - MEAN SCORE:  -115.19334181362956
Last reward:  -100
EPISODE:  640  - SCORE:  -116.60701515464295  - MEAN SCORE:  -115.13494553980358
ACTUALIZA
Last reward:  -100
EPISODE:  641  - SCORE:  -143.2975475052822  - MEAN SCORE:  -115.53286515812007
Last reward:  -100
EPISODE:  642  - SCORE:  -259.9608326884438  - MEAN SCORE:  -117.14980849501335
Last reward:  -100
EPISODE:  643  - SCORE:  -106.64616731961485  - MEAN SCORE:  -116.70597444195234
Last reward:  -100
EPISODE:  644  - SCORE:  -126.03531333378342  - MEAN SCORE:  -117.01747813903029
Last reward:  -100
EPISODE:  645  - SCORE:  -76.69382093825043  - MEAN SCORE:  -116.93637491224544
Last reward:  -100
EPISODE:  646  - SCORE:  -101.62559592698497  - MEAN SCORE:  -116.85410246844944
Last reward:  -100
EPISODE:  647  - SCORE:  -120.33841802465412  - MEAN SCORE:  -116.77719763666532
Last reward:  -100
EPISODE:  648  - SCORE:  -52.95866947121593  - MEAN SCORE:  -116.18345036569454
Last reward: 

Last reward:  -100
EPISODE:  722  - SCORE:  -117.23277532614995  - MEAN SCORE:  -113.93911396964955
Last reward:  -100
EPISODE:  723  - SCORE:  -120.12481930696163  - MEAN SCORE:  -114.36832128660495
Last reward:  -100
EPISODE:  724  - SCORE:  -184.01069231438404  - MEAN SCORE:  -114.7322246482552
Last reward:  -100
EPISODE:  725  - SCORE:  -87.8696570546687  - MEAN SCORE:  -114.119444265856
Last reward:  -100
EPISODE:  726  - SCORE:  -109.046628287029  - MEAN SCORE:  -114.32795932675859
Last reward:  -100
EPISODE:  727  - SCORE:  -156.97416150738187  - MEAN SCORE:  -114.49021180552977
Last reward:  -100
EPISODE:  728  - SCORE:  -125.55661839036026  - MEAN SCORE:  -114.94490876038476
Last reward:  -100
EPISODE:  729  - SCORE:  -113.47784853442624  - MEAN SCORE:  -114.80595589259357
Last reward:  -100
EPISODE:  730  - SCORE:  -103.57850810178766  - MEAN SCORE:  -114.89992726913123
Last reward:  -100
EPISODE:  731  - SCORE:  -173.04492332124772  - MEAN SCORE:  -115.61398345814914
Last re

Last reward:  -100
EPISODE:  805  - SCORE:  -113.82072531631931  - MEAN SCORE:  -107.65722001129465
Last reward:  -100
EPISODE:  806  - SCORE:  -100.78195382496517  - MEAN SCORE:  -107.65664781416463
Last reward:  -100
EPISODE:  807  - SCORE:  -88.05586694711894  - MEAN SCORE:  -107.60997825233218
Last reward:  -100
EPISODE:  808  - SCORE:  -117.11244307450806  - MEAN SCORE:  -107.48445659717328
Last reward:  -100
EPISODE:  809  - SCORE:  -115.20456088693791  - MEAN SCORE:  -107.56163007281168
Last reward:  -100
EPISODE:  810  - SCORE:  -93.9446268417904  - MEAN SCORE:  -107.579323092204
Last reward:  -100
EPISODE:  811  - SCORE:  -92.89199406864205  - MEAN SCORE:  -107.39054422448845
Last reward:  -100
EPISODE:  812  - SCORE:  -86.50063548269856  - MEAN SCORE:  -107.36689802429044
Last reward:  -100
EPISODE:  813  - SCORE:  -95.52668164354428  - MEAN SCORE:  -107.37301127104416
Last reward:  -100
EPISODE:  814  - SCORE:  -104.63769736612099  - MEAN SCORE:  -107.14630050241672
ACTUALIZ

Last reward:  -100
EPISODE:  887  - SCORE:  -101.26846389578023  - MEAN SCORE:  -113.71958110515287
Last reward:  -100
EPISODE:  888  - SCORE:  -102.50900215247262  - MEAN SCORE:  -113.844382606704
Last reward:  -100
EPISODE:  889  - SCORE:  -92.06328356177015  - MEAN SCORE:  -114.07432406477838
Last reward:  -100
EPISODE:  890  - SCORE:  -139.1880780879228  - MEAN SCORE:  -114.68949329172366
Last reward:  -100
EPISODE:  891  - SCORE:  -144.04740366949127  - MEAN SCORE:  -115.00416661917163
Last reward:  -100
EPISODE:  892  - SCORE:  -97.39203415429003  - MEAN SCORE:  -115.07530975675108
Last reward:  -100
EPISODE:  893  - SCORE:  -114.08577253849428  - MEAN SCORE:  -115.21151392101133
Last reward:  -100
EPISODE:  894  - SCORE:  -112.91243035837306  - MEAN SCORE:  -115.59687738393295
Last reward:  -100
EPISODE:  895  - SCORE:  -118.38899879709152  - MEAN SCORE:  -115.51118865420685
Last reward:  -100
EPISODE:  896  - SCORE:  -123.29249501932884  - MEAN SCORE:  -115.59050763728197
Last 

Last reward:  -100
EPISODE:  970  - SCORE:  -176.90122797947924  - MEAN SCORE:  -120.906055917082
Last reward:  -100
EPISODE:  971  - SCORE:  -134.125383598078  - MEAN SCORE:  -120.53035697943649
Last reward:  -100
EPISODE:  972  - SCORE:  -108.88352508769944  - MEAN SCORE:  -120.2976979525988
Last reward:  -100
EPISODE:  973  - SCORE:  -105.33427951504238  - MEAN SCORE:  -120.09940143082542
Last reward:  -100
EPISODE:  974  - SCORE:  -98.86532775102076  - MEAN SCORE:  -119.94991976389247
Last reward:  -100
EPISODE:  975  - SCORE:  -144.33163360103282  - MEAN SCORE:  -120.17365268170943
Last reward:  -100
EPISODE:  976  - SCORE:  -130.4921254657864  - MEAN SCORE:  -120.49644676321518
Last reward:  -100
EPISODE:  977  - SCORE:  -101.22394973236362  - MEAN SCORE:  -119.47390801876995
Last reward:  -100
EPISODE:  978  - SCORE:  -152.63349922817196  - MEAN SCORE:  -119.90318669096204
Last reward:  -100
EPISODE:  979  - SCORE:  -163.46180395330057  - MEAN SCORE:  -120.3916457472329
Last rew

Last reward:  -100
EPISODE:  1052  - SCORE:  -108.66658896306306  - MEAN SCORE:  -124.06430609575969
Last reward:  -100
EPISODE:  1053  - SCORE:  -107.13387709382279  - MEAN SCORE:  -124.10886244911332
Last reward:  -100
EPISODE:  1054  - SCORE:  -91.28286529842794  - MEAN SCORE:  -123.8579587469663
ACTUALIZA
Last reward:  -100
EPISODE:  1055  - SCORE:  -111.453636414087  - MEAN SCORE:  -124.4277779206824
Last reward:  -100
EPISODE:  1056  - SCORE:  -101.98436388347446  - MEAN SCORE:  -123.94167320903478
Last reward:  -100
EPISODE:  1057  - SCORE:  -111.86483671821897  - MEAN SCORE:  -123.7093615214058
Last reward:  -100
EPISODE:  1058  - SCORE:  -131.57622286607136  - MEAN SCORE:  -123.21626661001682
Last reward:  -100
EPISODE:  1059  - SCORE:  -100.28495842564132  - MEAN SCORE:  -122.9388009217973
Last reward:  -100
EPISODE:  1060  - SCORE:  -100.21513105257236  - MEAN SCORE:  -122.4016656048374
Last reward:  -100
EPISODE:  1061  - SCORE:  -109.3864435915008  - MEAN SCORE:  -122.3193

Last reward:  -100
EPISODE:  1133  - SCORE:  -118.19762637577085  - MEAN SCORE:  -122.70482561899307
Last reward:  -100
EPISODE:  1134  - SCORE:  -95.30745830984544  - MEAN SCORE:  -122.4433023404834
Last reward:  -100
EPISODE:  1135  - SCORE:  -126.94328091441635  - MEAN SCORE:  -122.80280669234176
Last reward:  -100
EPISODE:  1136  - SCORE:  -136.98095760231246  - MEAN SCORE:  -123.14942793469052
Last reward:  -100
EPISODE:  1137  - SCORE:  -128.93084083556099  - MEAN SCORE:  -122.64275759631101
ACTUALIZA
Last reward:  -100
EPISODE:  1138  - SCORE:  -108.01713167065505  - MEAN SCORE:  -122.36122921171753
Last reward:  -100
EPISODE:  1139  - SCORE:  -107.7145529331149  - MEAN SCORE:  -122.53122104763787
Last reward:  -100
EPISODE:  1140  - SCORE:  -129.93832622222527  - MEAN SCORE:  -122.78323590724193
Last reward:  -100
EPISODE:  1141  - SCORE:  -122.52916182336784  - MEAN SCORE:  -122.6258389055647
Last reward:  -100
EPISODE:  1142  - SCORE:  -134.34904704929417  - MEAN SCORE:  -122

EPISODE:  1216  - SCORE:  -99.99427797232747  - MEAN SCORE:  -113.25712360321478
Last reward:  -100
EPISODE:  1217  - SCORE:  -100.46091795824195  - MEAN SCORE:  -113.20579916843548
Last reward:  -100
EPISODE:  1218  - SCORE:  -80.07132952837766  - MEAN SCORE:  -112.6712278276101
Last reward:  -100
EPISODE:  1219  - SCORE:  -69.10298750310346  - MEAN SCORE:  -112.26873038282947
Last reward:  -100
EPISODE:  1220  - SCORE:  -89.59005760805918  - MEAN SCORE:  -112.01533329842049
Last reward:  -100
EPISODE:  1221  - SCORE:  -107.6968805609475  - MEAN SCORE:  -111.82450127423525
ACTUALIZA
Last reward:  -100
EPISODE:  1222  - SCORE:  -112.60398118139273  - MEAN SCORE:  -112.12074527334036
Last reward:  -100
EPISODE:  1223  - SCORE:  -97.2932085799635  - MEAN SCORE:  -111.69070346630826
Last reward:  -100
EPISODE:  1224  - SCORE:  -89.8761415163151  - MEAN SCORE:  -111.34761739530387
Last reward:  -100
EPISODE:  1225  - SCORE:  -105.31362091315928  - MEAN SCORE:  -111.30948489375731
Last rewa

Last reward:  -100
EPISODE:  1298  - SCORE:  -120.98059482621531  - MEAN SCORE:  -107.95240812593205
Last reward:  -100
EPISODE:  1299  - SCORE:  -82.73782259305298  - MEAN SCORE:  -107.66196540706564
Last reward:  -100
EPISODE:  1300  - SCORE:  -76.63256515908319  - MEAN SCORE:  -107.48103268047407
Last reward:  -100
EPISODE:  1301  - SCORE:  -92.44540499700389  - MEAN SCORE:  -107.56020096523054
Last reward:  -100
EPISODE:  1302  - SCORE:  -126.56779270230668  - MEAN SCORE:  -107.72871211665617
Last reward:  -100
EPISODE:  1303  - SCORE:  -117.2387834677629  - MEAN SCORE:  -107.80762593555204
Last reward:  -100
EPISODE:  1304  - SCORE:  -115.13467431080736  - MEAN SCORE:  -107.9208434205878
Last reward:  -100
EPISODE:  1305  - SCORE:  -119.11388863099286  - MEAN SCORE:  -107.89747022672495
Last reward:  -100
EPISODE:  1306  - SCORE:  -170.34892479182543  - MEAN SCORE:  -108.43310060016161
Last reward:  -100
EPISODE:  1307  - SCORE:  -103.9420880269338  - MEAN SCORE:  -108.49501393860

Last reward:  -100
EPISODE:  1380  - SCORE:  -106.09436241499392  - MEAN SCORE:  -115.31860354000615
ACTUALIZA
Last reward:  -100
EPISODE:  1381  - SCORE:  -131.44807184484432  - MEAN SCORE:  -115.37327857986229
Last reward:  -100
EPISODE:  1382  - SCORE:  -127.38850945745742  - MEAN SCORE:  -115.49363461148103
Last reward:  -100
EPISODE:  1383  - SCORE:  -112.08010464973661  - MEAN SCORE:  -115.39904230980974
Last reward:  -100
EPISODE:  1384  - SCORE:  -108.41984935093363  - MEAN SCORE:  -115.40941869531625
Last reward:  -100
EPISODE:  1385  - SCORE:  -112.005715776114  - MEAN SCORE:  -115.4808397111835
Last reward:  -100
EPISODE:  1386  - SCORE:  -86.12470687055452  - MEAN SCORE:  -115.45002477846161
Last reward:  -100
EPISODE:  1387  - SCORE:  -132.1829684474991  - MEAN SCORE:  -115.85337116150323
Last reward:  -100
EPISODE:  1388  - SCORE:  -100.42336145184882  - MEAN SCORE:  -115.7729188618917
Last reward:  -100
EPISODE:  1389  - SCORE:  -129.0734852321035  - MEAN SCORE:  -115.79

Last reward:  -100
EPISODE:  1461  - SCORE:  -104.79645319424183  - MEAN SCORE:  -116.99845802081448
Last reward:  -100
EPISODE:  1462  - SCORE:  -98.31826637810214  - MEAN SCORE:  -116.88584647194682
Last reward:  -100
EPISODE:  1463  - SCORE:  -119.93526198451661  - MEAN SCORE:  -117.46951574548693
Last reward:  -100
EPISODE:  1464  - SCORE:  -111.10000153618552  - MEAN SCORE:  -117.53429333748267
Last reward:  -100
EPISODE:  1465  - SCORE:  -120.03031571966241  - MEAN SCORE:  -117.91673842446349
Last reward:  -100
EPISODE:  1466  - SCORE:  -142.44409205237093  - MEAN SCORE:  -118.14854774432771
Last reward:  -100
EPISODE:  1467  - SCORE:  -125.76689537655494  - MEAN SCORE:  -118.4341067010142
Last reward:  -100
EPISODE:  1468  - SCORE:  -103.63507685228143  - MEAN SCORE:  -117.91823703487627
ACTUALIZA
Last reward:  -100
EPISODE:  1469  - SCORE:  -92.40448199895789  - MEAN SCORE:  -117.87909742939135
Last reward:  -100
EPISODE:  1470  - SCORE:  -99.58403150388864  - MEAN SCORE:  -117

Last reward:  -100
EPISODE:  1542  - SCORE:  -140.55616553218778  - MEAN SCORE:  -117.1469174821504
Last reward:  -100
EPISODE:  1543  - SCORE:  -131.88701426493833  - MEAN SCORE:  -116.87129115771262
Last reward:  -100
EPISODE:  1544  - SCORE:  -140.53467613523853  - MEAN SCORE:  -117.16004966541271
Last reward:  -100
EPISODE:  1545  - SCORE:  -115.78542862713643  - MEAN SCORE:  -117.0246231871156
Last reward:  -100
EPISODE:  1546  - SCORE:  -142.3957998329792  - MEAN SCORE:  -117.36130089079316
Last reward:  -100
EPISODE:  1547  - SCORE:  -112.62766888842222  - MEAN SCORE:  -117.31600999021649
Last reward:  -100
EPISODE:  1548  - SCORE:  -105.99691447339471  - MEAN SCORE:  -117.27014841996974
Last reward:  -100
EPISODE:  1549  - SCORE:  -130.63193484573674  - MEAN SCORE:  -117.1667243661399
Last reward:  -100
EPISODE:  1550  - SCORE:  -132.2416910384785  - MEAN SCORE:  -117.17965145124714
Last reward:  -100
EPISODE:  1551  - SCORE:  -105.517347148906  - MEAN SCORE:  -116.763378556265

Last reward:  -100
EPISODE:  1624  - SCORE:  -128.81939602252825  - MEAN SCORE:  -120.52629607858215
Last reward:  -100
EPISODE:  1625  - SCORE:  -137.01100565824365  - MEAN SCORE:  -120.70380723970082
ACTUALIZA
Last reward:  -100
EPISODE:  1626  - SCORE:  -122.23330096998282  - MEAN SCORE:  -120.58908627485368
Last reward:  -100
EPISODE:  1627  - SCORE:  -112.0505093959762  - MEAN SCORE:  -120.52846732009512
Last reward:  -100
EPISODE:  1628  - SCORE:  -111.98171862649644  - MEAN SCORE:  -120.26700320837966
Last reward:  -100
EPISODE:  1629  - SCORE:  -113.2025251745476  - MEAN SCORE:  -120.25184067317994
Last reward:  -100
EPISODE:  1630  - SCORE:  -145.36736986581636  - MEAN SCORE:  -120.55074598965278
Last reward:  -100
EPISODE:  1631  - SCORE:  -75.91615732885603  - MEAN SCORE:  -120.31333461349392
Last reward:  -100
EPISODE:  1632  - SCORE:  -142.39058315249414  - MEAN SCORE:  -120.14454514744861
Last reward:  -100
EPISODE:  1633  - SCORE:  -121.79469482734356  - MEAN SCORE:  -12

Last reward:  -100
EPISODE:  1706  - SCORE:  -113.94633378450894  - MEAN SCORE:  -120.01166542161563
Last reward:  -100
EPISODE:  1707  - SCORE:  -100.19157951338447  - MEAN SCORE:  -119.79930715701994
Last reward:  -100
EPISODE:  1708  - SCORE:  -124.57470867544859  - MEAN SCORE:  -119.83725290660188
Last reward:  -100
EPISODE:  1709  - SCORE:  -152.5669632882736  - MEAN SCORE:  -120.42734520235213
ACTUALIZA
Last reward:  -100
EPISODE:  1710  - SCORE:  -133.09444933241411  - MEAN SCORE:  -120.56930398581407
Last reward:  -100
EPISODE:  1711  - SCORE:  -131.9245373303633  - MEAN SCORE:  -120.83675718476516
Last reward:  -100
EPISODE:  1712  - SCORE:  -126.7939921158117  - MEAN SCORE:  -120.53280923729389
Last reward:  -100
EPISODE:  1713  - SCORE:  -151.4610313502844  - MEAN SCORE:  -120.73729422233093
Last reward:  -100
EPISODE:  1714  - SCORE:  -101.73750138932981  - MEAN SCORE:  -120.81556330758902
Last reward:  -100
EPISODE:  1715  - SCORE:  -129.85064421368173  - MEAN SCORE:  -120

Last reward:  -100
EPISODE:  1787  - SCORE:  -124.59447268634521  - MEAN SCORE:  -117.65092483561055
Last reward:  -100
EPISODE:  1788  - SCORE:  -141.15618834711768  - MEAN SCORE:  -117.90116253045886
Last reward:  -100
EPISODE:  1789  - SCORE:  -121.9323282741968  - MEAN SCORE:  -117.88211499890964
Last reward:  -100
EPISODE:  1790  - SCORE:  -105.4241330820433  - MEAN SCORE:  -117.53879213010045
Last reward:  -100
EPISODE:  1791  - SCORE:  -155.71556132137476  - MEAN SCORE:  -118.25913695528614
Last reward:  -100
EPISODE:  1792  - SCORE:  -173.8182758670394  - MEAN SCORE:  -118.76007986306813
Last reward:  -100
EPISODE:  1793  - SCORE:  -133.14117863824418  - MEAN SCORE:  -118.84291217926133
Last reward:  -100
EPISODE:  1794  - SCORE:  -108.63643586216847  - MEAN SCORE:  -118.68284608006644
Last reward:  -100
EPISODE:  1795  - SCORE:  -90.34878789803854  - MEAN SCORE:  -118.47596188789899
Last reward:  -100
EPISODE:  1796  - SCORE:  -140.53223394665426  - MEAN SCORE:  -118.379718647

Last reward:  -100
EPISODE:  1868  - SCORE:  -301.0016306929544  - MEAN SCORE:  -126.13669848906808
Last reward:  -100
EPISODE:  1869  - SCORE:  -115.88296467000244  - MEAN SCORE:  -125.9269941771137
Last reward:  -100
EPISODE:  1870  - SCORE:  -152.43696832740306  - MEAN SCORE:  -126.22314930789213
ACTUALIZA
Last reward:  -100
EPISODE:  1871  - SCORE:  -153.3783660123293  - MEAN SCORE:  -126.90865825800813
Last reward:  -100
EPISODE:  1872  - SCORE:  -108.18103105337934  - MEAN SCORE:  -126.87144844546168
Last reward:  -100
EPISODE:  1873  - SCORE:  -86.95556769170005  - MEAN SCORE:  -126.299331500494
Last reward:  -100
EPISODE:  1874  - SCORE:  -101.55091967360056  - MEAN SCORE:  -125.72770337780891
Last reward:  -100
EPISODE:  1875  - SCORE:  -120.5219854407442  - MEAN SCORE:  -125.88127341190317
Last reward:  -100
EPISODE:  1876  - SCORE:  -148.25094820570325  - MEAN SCORE:  -126.16877647476687
Last reward:  -100
EPISODE:  1877  - SCORE:  -133.6226652469989  - MEAN SCORE:  -126.365

Last reward:  -100
EPISODE:  1949  - SCORE:  -166.33850886433683  - MEAN SCORE:  -121.05104125417797
Last reward:  -100
EPISODE:  1950  - SCORE:  -152.81567474699813  - MEAN SCORE:  -121.45361516461033
Last reward:  -100
EPISODE:  1951  - SCORE:  -134.75534509269227  - MEAN SCORE:  -121.50522541916162
Last reward:  -100
EPISODE:  1952  - SCORE:  -108.28092925297108  - MEAN SCORE:  -121.31578641007083
Last reward:  -100
EPISODE:  1953  - SCORE:  -129.09940345945012  - MEAN SCORE:  -121.40479053452184
Last reward:  -100
EPISODE:  1954  - SCORE:  -148.9562598867281  - MEAN SCORE:  -121.92909247166395
Last reward:  -100
EPISODE:  1955  - SCORE:  -108.26163491400919  - MEAN SCORE:  -122.09068500320468
Last reward:  -100
EPISODE:  1956  - SCORE:  -110.5336303071319  - MEAN SCORE:  -121.73797063333618
ACTUALIZA
Last reward:  -100
EPISODE:  1957  - SCORE:  -166.90222111757322  - MEAN SCORE:  -122.46684634785002
Last reward:  -100
EPISODE:  1958  - SCORE:  -207.52368658792813  - MEAN SCORE:  -1

Last reward:  -100
EPISODE:  2032  - SCORE:  -99.44638353726279  - MEAN SCORE:  -121.35765032380064
Last reward:  -100
EPISODE:  2033  - SCORE:  -119.12034572278786  - MEAN SCORE:  -121.3215740928878
Last reward:  -100
EPISODE:  2034  - SCORE:  -110.33647833406827  - MEAN SCORE:  -121.28642553694867
Last reward:  -100
EPISODE:  2035  - SCORE:  -116.67305763014096  - MEAN SCORE:  -121.57464836779954
Last reward:  -100
EPISODE:  2036  - SCORE:  -120.67685587100266  - MEAN SCORE:  -121.33529701832127
Last reward:  -100
EPISODE:  2037  - SCORE:  -135.8942670312242  - MEAN SCORE:  -121.8060095674848
ACTUALIZA
Last reward:  -100
EPISODE:  2038  - SCORE:  -193.12094569250633  - MEAN SCORE:  -122.79921149346376
Last reward:  -100
EPISODE:  2039  - SCORE:  -84.05086231903559  - MEAN SCORE:  -122.56489396001928
Last reward:  -100
EPISODE:  2040  - SCORE:  -137.88313697206104  - MEAN SCORE:  -123.10095198300583
Last reward:  -100
EPISODE:  2041  - SCORE:  -107.04178424828697  - MEAN SCORE:  -123.

Last reward:  -100
EPISODE:  2113  - SCORE:  -143.26424315192358  - MEAN SCORE:  -126.72426421810783
Last reward:  -100
EPISODE:  2114  - SCORE:  -159.85309274304035  - MEAN SCORE:  -127.0757815899401
Last reward:  -100
EPISODE:  2115  - SCORE:  -181.79204438075206  - MEAN SCORE:  -127.2587719207801
Last reward:  -100
EPISODE:  2116  - SCORE:  -114.75254308209102  - MEAN SCORE:  -127.29203490551183
Last reward:  -100
EPISODE:  2117  - SCORE:  -106.13009334347085  - MEAN SCORE:  -127.13761960160109
Last reward:  -100
EPISODE:  2118  - SCORE:  -137.86114931679748  - MEAN SCORE:  -127.32431354738354
ACTUALIZA
Last reward:  -100
EPISODE:  2119  - SCORE:  -145.980104632666  - MEAN SCORE:  -127.40211683550902
Last reward:  -100
EPISODE:  2120  - SCORE:  -116.17988257220462  - MEAN SCORE:  -127.59959492252405
Last reward:  -100
EPISODE:  2121  - SCORE:  -118.68513914611712  - MEAN SCORE:  -127.67658119966626
Last reward:  -100
EPISODE:  2122  - SCORE:  -374.5136092771714  - MEAN SCORE:  -130.

Last reward:  -100
EPISODE:  2195  - SCORE:  -199.21955855320377  - MEAN SCORE:  -127.69102891310332
Last reward:  -100
EPISODE:  2196  - SCORE:  -165.80773058638468  - MEAN SCORE:  -128.55878851831622
Last reward:  -100
EPISODE:  2197  - SCORE:  -112.20500486169169  - MEAN SCORE:  -128.61010050045977
Last reward:  -100
EPISODE:  2198  - SCORE:  -108.16419136490703  - MEAN SCORE:  -128.46089159037814
Last reward:  -100
EPISODE:  2199  - SCORE:  -108.39975313026562  - MEAN SCORE:  -127.18288887991127
Last reward:  -100
EPISODE:  2200  - SCORE:  -87.32230154231225  - MEAN SCORE:  -126.84505919442054
Last reward:  -100
EPISODE:  2201  - SCORE:  -115.3290759881935  - MEAN SCORE:  -126.33623219447794
Last reward:  -100
EPISODE:  2202  - SCORE:  -124.98996254735748  - MEAN SCORE:  -126.2570220580943
Last reward:  -100
EPISODE:  2203  - SCORE:  -109.35195168507906  - MEAN SCORE:  -126.33521269128373
Last reward:  -100
EPISODE:  2204  - SCORE:  -143.67463522913408  - MEAN SCORE:  -126.60083340

Last reward:  -100
EPISODE:  2276  - SCORE:  -130.3767018905673  - MEAN SCORE:  -120.85989604476921
Last reward:  -100
EPISODE:  2277  - SCORE:  -155.2822532789826  - MEAN SCORE:  -121.02404244620148
Last reward:  -100
EPISODE:  2278  - SCORE:  -117.88935991656732  - MEAN SCORE:  -121.23620821769099
Last reward:  -100
EPISODE:  2279  - SCORE:  -131.15811205352725  - MEAN SCORE:  -121.32051319925542
ACTUALIZA
Last reward:  -100
EPISODE:  2280  - SCORE:  -116.74368118219162  - MEAN SCORE:  -121.43133301489607
Last reward:  -100
EPISODE:  2281  - SCORE:  -121.4639623349716  - MEAN SCORE:  -121.4772574286543
Last reward:  -100
EPISODE:  2282  - SCORE:  -154.107573636901  - MEAN SCORE:  -121.94918590350976
Last reward:  -100
EPISODE:  2283  - SCORE:  -126.02177352725002  - MEAN SCORE:  -122.08575565107363
Last reward:  -100
EPISODE:  2284  - SCORE:  -88.27319127814647  - MEAN SCORE:  -121.7543252941829
Last reward:  -100
EPISODE:  2285  - SCORE:  -93.36992401440105  - MEAN SCORE:  -121.3226

Last reward:  -100
EPISODE:  2358  - SCORE:  -118.99008616306284  - MEAN SCORE:  -122.3407513580467
Last reward:  -100
EPISODE:  2359  - SCORE:  -116.85596958106657  - MEAN SCORE:  -122.31308934424996
Last reward:  -100
EPISODE:  2360  - SCORE:  -108.8231787349056  - MEAN SCORE:  -122.48611898713935
Last reward:  -100
EPISODE:  2361  - SCORE:  -103.93259063614873  - MEAN SCORE:  -122.45659495959067
Last reward:  -100
EPISODE:  2362  - SCORE:  -108.83992778584731  - MEAN SCORE:  -122.16112905941777
Last reward:  -100
EPISODE:  2363  - SCORE:  -126.29300582942803  - MEAN SCORE:  -122.13554910677767
Last reward:  -100
EPISODE:  2364  - SCORE:  -93.42928242309952  - MEAN SCORE:  -121.49016488072363
Last reward:  -100
EPISODE:  2365  - SCORE:  -134.11223309956475  - MEAN SCORE:  -121.78519092426052
ACTUALIZA
Last reward:  -100
EPISODE:  2366  - SCORE:  -100.1041537988574  - MEAN SCORE:  -121.58912985517901
Last reward:  -100
EPISODE:  2367  - SCORE:  -100.24583115332996  - MEAN SCORE:  -121

Last reward:  -100
EPISODE:  2440  - SCORE:  -157.57976478325784  - MEAN SCORE:  -121.82315320953188
Last reward:  -100
EPISODE:  2441  - SCORE:  -90.23488832479731  - MEAN SCORE:  -121.47034995766485
Last reward:  -100
EPISODE:  2442  - SCORE:  -136.27969411478335  - MEAN SCORE:  -121.30232678952892
Last reward:  -100
EPISODE:  2443  - SCORE:  -122.50938069656821  - MEAN SCORE:  -121.45705576307687
Last reward:  -100
EPISODE:  2444  - SCORE:  -105.69465211856573  - MEAN SCORE:  -121.19456698246813
Last reward:  -100
EPISODE:  2445  - SCORE:  -135.7116387945718  - MEAN SCORE:  -121.27597745367775
Last reward:  -100
EPISODE:  2446  - SCORE:  -116.29961745282117  - MEAN SCORE:  -121.14931056533987
Last reward:  -100
EPISODE:  2447  - SCORE:  -115.69110090852377  - MEAN SCORE:  -121.33345150833921
Last reward:  -100
EPISODE:  2448  - SCORE:  -83.08103379321628  - MEAN SCORE:  -120.8840022858048
Last reward:  -100
EPISODE:  2449  - SCORE:  -138.79155172436424  - MEAN SCORE:  -121.055910270

Last reward:  -100
EPISODE:  2523  - SCORE:  -120.01257817081168  - MEAN SCORE:  -125.80247888615676
Last reward:  -100
EPISODE:  2524  - SCORE:  -128.52007815380432  - MEAN SCORE:  -125.86695417130008
Last reward:  -100
EPISODE:  2525  - SCORE:  -95.10293618833998  - MEAN SCORE:  -125.6210028230217
Last reward:  -100
EPISODE:  2526  - SCORE:  -108.31239924570448  - MEAN SCORE:  -125.23673896651322
Last reward:  -100
EPISODE:  2527  - SCORE:  -150.06052270516176  - MEAN SCORE:  -125.45996992937476
Last reward:  -100
EPISODE:  2528  - SCORE:  -123.4094478811036  - MEAN SCORE:  -125.40911507909637
Last reward:  -100
EPISODE:  2529  - SCORE:  -138.26240065506303  - MEAN SCORE:  -125.24880450698484
ACTUALIZA
Last reward:  -100
EPISODE:  2530  - SCORE:  -142.31508619263712  - MEAN SCORE:  -125.51704862553468
Last reward:  -100
EPISODE:  2531  - SCORE:  13.513706290491456  - MEAN SCORE:  -123.98264377854561
Last reward:  -100
EPISODE:  2532  - SCORE:  -128.8150531004909  - MEAN SCORE:  -124.

Last reward:  -100
EPISODE:  2605  - SCORE:  -115.58976741890807  - MEAN SCORE:  -127.98307618430498
Last reward:  -100
EPISODE:  2606  - SCORE:  -143.09082690275122  - MEAN SCORE:  -127.99633901336387
Last reward:  -100
EPISODE:  2607  - SCORE:  -183.09385119185833  - MEAN SCORE:  -127.94134754728475
Last reward:  -100
EPISODE:  2608  - SCORE:  -118.54812025384577  - MEAN SCORE:  -127.54065624958874
Last reward:  -100
EPISODE:  2609  - SCORE:  -130.85777454646424  - MEAN SCORE:  -127.14249646538156
Last reward:  -100
EPISODE:  2610  - SCORE:  -125.63412456738135  - MEAN SCORE:  -126.94561091861969
Last reward:  -100
EPISODE:  2611  - SCORE:  -117.9377807113748  - MEAN SCORE:  -126.73887794955512
Last reward:  -100
EPISODE:  2612  - SCORE:  -120.21163161532432  - MEAN SCORE:  -126.67122540269902
Last reward:  -100
EPISODE:  2613  - SCORE:  -117.03946017054282  - MEAN SCORE:  -126.71002558286658
Last reward:  -100
EPISODE:  2614  - SCORE:  -183.05886767594728  - MEAN SCORE:  -127.324425

KeyboardInterrupt: 