Cambiar LR

In [None]:
import os

import random
import gym
import pylab
import numpy as np
import tensorflow as tf
import time
import matplotlib.pyplot as plt

#tf.config.experimental_run_functions_eagerly(True)
tf.compat.v1.disable_eager_execution()
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras import backend as K
import copy

EPISODES = 100000 # Number of times the enviroment is ran
LR = 0.0025 # Learning rate 
NUM_ACTIONS = 4 # Number of possible action in the environment
NUM_STATES = 8 # Number of possible states in the exvironment
EPOCHS = 10 # Epochs to train the network (recommended between 3 and 30)
BATCH_SIZE = 64 # Batch size for the neural nets
BUFFER_SIZE = 2048 # Buffer of experiences
SHUFFLE = True # Whether to shuffle data or not while training
OPTIMIZER = Adam # Optimizer for both actor and critic
GAMMA = 0.99 # Used for the estimated reward
NORMALIZE = True # Whether to normalize GAE or not


# Create the actor used to select the action given an state
class Actor_Model:
    def __init__(self):
        X_input = Input(NUM_STATES)

        X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X_input)
        X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X)
        X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X)
        
        # Softmax as there are different probabilities depending on the action
        output = Dense(NUM_ACTIONS, activation="softmax")(X)
        
        # Compile the model with the custom loss
        self.model = Model(inputs = X_input, outputs = output)
        self.model.compile(loss='categorical_crossentropy', optimizer=OPTIMIZER(lr=LR))

# Create the critic which will criticise how the actor is performing    
class Critic_Model:
    def __init__(self):
        X_input = Input(NUM_STATES)

        X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X_input)
        X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X)
        X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X)
        
        # Linear output to know how good the action is
        value = Dense(1)(X)
        
        # Compile it with mse loss and gradient descent
        self.model = Model(inputs=X_input, outputs = value)
        self.model.compile(loss='mse', optimizer=OPTIMIZER(lr=LR))

# Combine both Actor and Critic to create the agent
class PPOAgent:
    def __init__(self, env_name):
        # Environment parameters
        self.env_name = env_name       
        self.env = gym.make(env_name)
        self.episode = 0 # used to track current number episoded since start
        self.max_average = 0 # record max average reached
        
        # Used to plot a grapgh of the train process
        self.scores_, self.average_ = [], []

        # Create Actor-Critic network models
        self.Actor = Actor_Model()
        self.Critic = Critic_Model()
    
        # Names for the models
        self.Actor_name = f"{self.env_name}_PPO_Actor.h5"
        self.Critic_name = f"{self.env_name}_PPO_Critic.h5"

    # Get the action given the current state    
    def act(self, state):
        # Use the network to predict the next action to take, using the model
        prediction = self.Actor.model.predict(state)[0]
        
        # Probability based to choose the action
        action = np.random.choice(NUM_ACTIONS, p=prediction)
        action_onehot = np.zeros([NUM_ACTIONS])
        action_onehot[action] = 1
        return action, action_onehot, prediction
    
    def discount_rewards(self, reward):#gaes is better
        # Compute the gamma-discounted rewards over an episode
        # We apply the discount and normalize it to avoid big variability of rewards
        running_add = 0
        discounted_r = np.zeros_like(reward)
        for i in reversed(range(0,len(reward))):
            running_add = running_add * GAMMA + reward[i]
            discounted_r[i] = running_add

        discounted_r -= np.mean(discounted_r) # normalizing the result
        discounted_r /= (np.std(discounted_r) + 1e-8) # divide by standard deviation
        return discounted_r
    
    def replay(self, states, actions, rewards, predictions, dones, next_states):
        # Reshape memory to appropriate shape for training
        states = np.vstack(states)
        next_states = np.vstack(next_states)
        actions = np.vstack(actions)
        predictions = np.vstack(predictions)

        # Get Critic network predictions for state and next state
        values = self.Critic.model.predict(states)
        next_values = self.Critic.model.predict(next_states)

        # Compute de discounted reward
        discounted_rewards = self.discount_rewards(rewards)

        # Stack info to unpack it in the custom loss
        y_true = actions

        values = np.reshape(values, (np.shape(values)[0],))

        advantages = np.vstack(discounted_rewards - values)
        advantages = np.reshape(advantages, (np.shape(advantages)[0],))

        start_train = time.time()
        # Training Actor and Critic networks
        a_loss = self.Actor.model.fit(states, y_true, sample_weight=advantages, epochs=EPOCHS, verbose=0, shuffle=SHUFFLE, batch_size=BATCH_SIZE)
        c_loss = self.Critic.model.fit(states, discounted_rewards, epochs=EPOCHS, verbose=0, shuffle=SHUFFLE, batch_size=BATCH_SIZE)
        #print('Time: ', time.time()-start_train)
 
    def load(self):
        self.Actor.Actor.load_weights(self.Actor_name)
        self.Critic.Critic.load_weights(self.Critic_name)

    def save(self):
        self.Actor.model.save_weights(self.Actor_name)
        self.Critic.model.save_weights(self.Critic_name)

    def run_batch(self): # train every self.Training_batch episodes
        global LR
        state = self.env.reset()
        state = np.reshape(state, [1, NUM_STATES])
        done, score = False, 0
        finished = False
        while finished == False:
            # Instantiate or reset games memory
            states, next_states, actions, rewards, predictions, dones = [], [], [], [], [], []
            for t in range(BUFFER_SIZE):
                #self.env.render()
                # Actor picks an action
                action, action_onehot, prediction = self.act(state)
                # Retrieve new state, reward, and whether the state is terminal
                next_state, reward, done, _ = self.env.step(action)
                # Memorize (state, action, reward) for training
                states.append(state)
                next_states.append(np.reshape(next_state, [1, NUM_STATES]))
                actions.append(action_onehot)
                rewards.append(reward)
                dones.append(done)
                predictions.append(prediction)
                # Update current state
                state = np.reshape(next_state, [1, NUM_STATES])
                score += reward
                if done:
                    self.episode += 1
                    self.scores_.append(score)
                    #average, SAVING = self.PlotModel(score, self.episode)
                    #print("episode: {}/{}, score: {}, average: {:.2f} {}".format(self.episode, EPISODES, score, average, SAVING))
                    if self.episode >= 100:
                        average = sum(self.scores_[-100:])/100
                        print('Episode: {:>5}\t\tscore: {:>7.2f}\t\taverage: {:>7.2f}'.format(self.episode, score, average))
                        if average > self.max_average:
                            self.max_average = average
                            if self.max_average > 150:
                                self.save()
                            LR *= 0.99
                            K.set_value(self.Actor.model.optimizer.learning_rate, LR)
                            K.set_value(self.Critic.model.optimizer.learning_rate, LR)
                            
                        if average > 200:
                            plt.plot(self.scores_)
                            plt.xlabel("Episode")
                            plt.ylabel("Score")
                            finished = True
                            break

                    else:
                        print('Episode: {:>5}\t\tscore: {:>7.2f}\t\taverage: {:>7.2f}'.format(self.episode, score, sum(self.scores_)/len(self.scores_)))
                    
                    
                    state, done, score = self.env.reset(), False, 0
                    state = np.reshape(state, [1, NUM_STATES])
                    
            self.replay(states, actions, rewards, predictions, dones, next_states)
            if self.episode >= EPISODES:
                break
        self.env.close()   
                    
if __name__ == "__main__":
    start = time.time()
    env_name = 'LunarLander-v2'
    agent = PPOAgent(env_name)
    #agent.run() # train as PPO, train every epesode
    agent.run_batch() # train as PPO, train every batch, trains better
    #agent.run_multiprocesses(num_worker = 8)  # train PPO multiprocessed (fastest)
    #agent.test()
    print((time.time() - start)/60)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Episode:     1		score: -200.78		average: -200.78
Episode:     2		score: -149.42		average: -175.10
Episode:     3		score:  -96.43		average: -148.88
Episode:     4		score: -195.99		average: -160.66
Episode:     5		score:  -82.28		average: -144.98
Episode:     6		score: -164.03		average: -148.16
Episode:     7		score: -155.93		average: -149.27
Episode:     8		score:  -88.87		average: -141.72
Episode:     9		score:  -77.70		average: -134.60
Episode:    10		score: -123.55		average: -133.50
Episode:    11		score: -126.62		average: -132.87
Episode:    12		score: -132.57		average: -132.85
Episode:    13		score: -132.74		average: -132.84
Episode:    14		score: -122.05		average: -132.07
Episode:    15		score: -138.04		average: -132.47
Episode:    16		score:  -86.57		average: -129.60
Episode:    17		score:    6.76		average: -121.58
Episode:    18		score: -183.13		average: -125.00
Episode:    19		score: -224.71		avera

Episode:   163		score: -145.35		average: -139.04
Episode:   164		score: -141.13		average: -137.45
Episode:   165		score: -138.77		average: -137.21
Episode:   166		score: -172.82		average: -138.37
Episode:   167		score: -111.78		average: -137.18
Episode:   168		score: -100.62		average: -136.00
Episode:   169		score: -120.83		average: -136.78
Episode:   170		score: -107.73		average: -136.41
Episode:   171		score:  -95.80		average: -135.02
Episode:   172		score: -187.98		average: -135.43
Episode:   173		score:  -86.33		average: -132.69
Episode:   174		score:  -97.32		average: -130.89
Episode:   175		score: -120.05		average: -130.67
Episode:   176		score: -113.79		average: -130.16
Episode:   177		score: -157.90		average: -130.58
Episode:   178		score: -138.91		average: -130.73
Episode:   179		score: -121.33		average: -130.66
Episode:   180		score: -110.50		average: -130.70
Episode:   181		score: -161.10		average: -130.65
Episode:   182		score:  -90.48		average: -130.31
Episode:   183		scor

Episode:   331		score: -105.96		average: -129.33
Episode:   332		score: -157.47		average: -129.64
Episode:   333		score: -106.24		average: -129.45
Episode:   334		score: -201.73		average: -131.54
Episode:   335		score: -160.88		average: -132.20
Episode:   336		score: -179.96		average: -132.80
Episode:   337		score: -193.40		average: -133.79
Episode:   338		score: -120.74		average: -133.13
Episode:   339		score: -133.51		average: -133.39
Episode:   340		score: -145.69		average: -133.16
Episode:   341		score:   22.17		average: -131.37
Episode:   342		score: -151.41		average: -131.70
Episode:   343		score: -141.08		average: -131.33
Episode:   344		score: -122.43		average: -131.27
Episode:   345		score: -104.22		average: -131.23
Episode:   346		score: -159.66		average: -131.02
Episode:   347		score: -161.05		average: -131.58
Episode:   348		score: -154.07		average: -131.93
Episode:   349		score: -177.28		average: -132.59
Episode:   350		score: -127.66		average: -132.67
Episode:   351		scor

Episode:   499		score: -140.89		average: -134.68
Episode:   500		score: -105.07		average: -134.46
Episode:   501		score: -103.14		average: -134.07
Episode:   502		score:  -99.95		average: -134.05
Episode:   503		score:  -90.22		average: -133.71
Episode:   504		score: -105.92		average: -133.04
Episode:   505		score: -130.73		average: -132.71
Episode:   506		score: -137.20		average: -132.89
Episode:   507		score: -180.16		average: -133.36
Episode:   508		score: -131.59		average: -133.42
Episode:   509		score: -118.71		average: -133.22
Episode:   510		score:  -91.68		average: -132.54
Episode:   511		score: -127.55		average: -132.21
Episode:   512		score: -139.65		average: -132.53
Episode:   513		score:  -82.29		average: -131.53
Episode:   514		score: -108.99		average: -130.89
Episode:   515		score: -108.53		average: -130.06
Episode:   516		score: -108.18		average: -129.40
Episode:   517		score: -129.52		average: -129.17
Episode:   518		score: -139.21		average: -129.48
Episode:   519		scor

Episode:   667		score:  -10.94		average: -130.21
Episode:   668		score: -107.42		average: -129.67
Episode:   669		score: -100.27		average: -129.76
Episode:   670		score:  -96.69		average: -129.35
Episode:   671		score:   -5.55		average: -128.00
Episode:   672		score: -160.89		average: -128.61
Episode:   673		score: -134.66		average: -128.38
Episode:   674		score:  -36.86		average: -127.79
Episode:   675		score: -147.03		average: -127.54
Episode:   676		score: -122.28		average: -127.76
Episode:   677		score:  -11.77		average: -126.20
Episode:   678		score: -154.64		average: -126.33
Episode:   679		score: -133.11		average: -126.26
Episode:   680		score: -128.67		average: -125.93
Episode:   681		score:  -39.51		average: -125.07
Episode:   682		score: -142.39		average: -125.36
Episode:   683		score: -125.10		average: -125.43
Episode:   684		score: -125.47		average: -125.72
Episode:   685		score: -129.73		average: -125.65
Episode:   686		score: -150.66		average: -125.43
Episode:   687		scor

Episode:   835		score: -117.19		average: -132.21
Episode:   836		score: -119.25		average: -132.40
Episode:   837		score: -151.23		average: -132.80
Episode:   838		score:   -3.02		average: -131.16
Episode:   839		score: -146.13		average: -131.16
Episode:   840		score: -121.33		average: -131.21
Episode:   841		score: -108.26		average: -130.89
Episode:   842		score: -169.09		average: -130.74
Episode:   843		score:  -71.41		average: -129.57
Episode:   844		score: -151.71		average: -129.75
Episode:   845		score: -139.35		average: -129.86
Episode:   846		score: -109.54		average: -129.46
Episode:   847		score: -107.97		average: -129.42
Episode:   848		score: -103.30		average: -128.97
Episode:   849		score: -113.73		average: -129.19
Episode:   850		score: -111.02		average: -128.67
Episode:   851		score: -116.69		average: -128.66
Episode:   852		score: -155.32		average: -129.26
Episode:   853		score: -143.89		average: -129.50
Episode:   854		score: -128.68		average: -129.08
Episode:   855		scor

Episode:  1002		score: -137.95		average: -125.11
Episode:  1003		score: -129.06		average: -125.07
Episode:  1004		score: -194.54		average: -126.17
Episode:  1005		score: -108.20		average: -126.06
Episode:  1006		score: -145.49		average: -126.45
Episode:  1007		score: -125.51		average: -126.40
Episode:  1008		score: -118.76		average: -126.49
Episode:  1009		score:   22.66		average: -125.03
Episode:  1010		score: -148.43		average: -125.14
Episode:  1011		score: -132.00		average: -125.13
Episode:  1012		score: -123.67		average: -126.60
Episode:  1013		score: -159.81		average: -126.80
Episode:  1014		score: -143.43		average: -127.03
Episode:  1015		score: -122.89		average: -127.01
Episode:  1016		score: -174.29		average: -127.69
Episode:  1017		score:  -95.61		average: -127.39
Episode:  1018		score:    4.98		average: -125.86
Episode:  1019		score: -137.77		average: -126.08
Episode:  1020		score: -137.92		average: -125.89
Episode:  1021		score: -123.10		average: -125.51
Episode:  1022		scor

Episode:  1170		score: -146.34		average: -135.21
Episode:  1171		score: -110.61		average: -135.23
Episode:  1172		score: -100.03		average: -134.43
Episode:  1173		score: -129.99		average: -134.78
Episode:  1174		score: -142.20		average: -134.72
Episode:  1175		score: -116.81		average: -134.00
Episode:  1176		score: -116.24		average: -134.03
Episode:  1177		score: -150.54		average: -133.73
Episode:  1178		score: -127.53		average: -133.73
Episode:  1179		score: -136.93		average: -133.85
Episode:  1180		score:  -17.53		average: -132.80
Episode:  1181		score: -117.94		average: -132.10
Episode:  1182		score: -139.43		average: -131.96
Episode:  1183		score: -148.02		average: -132.20
Episode:  1184		score: -133.22		average: -131.74
Episode:  1185		score: -133.00		average: -131.93
Episode:  1186		score: -151.92		average: -132.53
Episode:  1187		score: -115.58		average: -132.12
Episode:  1188		score: -123.33		average: -132.12
Episode:  1189		score: -110.64		average: -131.67
Episode:  1190		scor

Episode:  1337		score: -171.69		average: -130.95
Episode:  1338		score: -116.77		average: -130.87
Episode:  1339		score: -123.92		average: -130.78
Episode:  1340		score: -125.77		average: -132.44
Episode:  1341		score: -140.85		average: -131.99
Episode:  1342		score: -118.59		average: -132.06
Episode:  1343		score: -144.57		average: -132.10
Episode:  1344		score: -142.33		average: -132.16
Episode:  1345		score: -149.31		average: -131.94
Episode:  1346		score: -125.10		average: -132.06
Episode:  1347		score: -179.83		average: -132.54
Episode:  1348		score: -131.80		average: -132.10
Episode:  1349		score: -108.58		average: -132.05
Episode:  1350		score: -141.71		average: -131.57
Episode:  1351		score: -175.19		average: -131.75
Episode:  1352		score: -104.64		average: -131.41
Episode:  1353		score: -133.82		average: -131.50
Episode:  1354		score:  -71.95		average: -131.15
Episode:  1355		score: -132.44		average: -130.83
Episode:  1356		score: -168.87		average: -131.15
Episode:  1357		scor

Episode:  1506		score: -101.46		average: -130.96
Episode:  1507		score: -136.38		average: -130.98
Episode:  1508		score:  -78.03		average: -130.68
Episode:  1509		score:  -91.49		average: -130.47
Episode:  1510		score: -124.88		average: -130.32
Episode:  1511		score:  -93.11		average: -129.74
Episode:  1512		score: -141.57		average: -131.18
Episode:  1513		score: -122.18		average: -131.19
Episode:  1514		score: -116.39		average: -130.87
Episode:  1515		score: -328.78		average: -132.33
Episode:  1516		score: -129.35		average: -132.37
Episode:  1517		score: -133.84		average: -132.12
Episode:  1518		score: -137.59		average: -132.11
Episode:  1519		score: -151.17		average: -131.42
Episode:  1520		score: -152.36		average: -131.89
Episode:  1521		score: -168.74		average: -132.24
Episode:  1522		score: -146.44		average: -132.23
Episode:  1523		score: -167.76		average: -132.65
Episode:  1524		score: -167.08		average: -132.92
Episode:  1525		score:   17.79		average: -131.39
Episode:  1526		scor

Episode:  1673		score: -136.36		average: -128.36
Episode:  1674		score: -144.71		average: -128.49
Episode:  1675		score: -140.00		average: -128.24
Episode:  1676		score: -151.94		average: -129.90
Episode:  1677		score: -135.93		average: -129.24
Episode:  1678		score:  -95.54		average: -128.87
Episode:  1679		score: -134.92		average: -129.30
Episode:  1680		score: -121.63		average: -129.12
Episode:  1681		score: -128.57		average: -129.67
Episode:  1682		score: -162.29		average: -129.94
Episode:  1683		score:  -43.79		average: -129.05
Episode:  1684		score: -192.19		average: -129.28
Episode:  1685		score: -137.61		average: -129.27
Episode:  1686		score:  -90.31		average: -129.05
Episode:  1687		score: -130.57		average: -129.14
Episode:  1688		score: -147.67		average: -129.45
Episode:  1689		score: -116.74		average: -128.91
Episode:  1690		score: -147.33		average: -128.79
Episode:  1691		score: -162.46		average: -129.31
Episode:  1692		score: -106.51		average: -128.69
Episode:  1693		scor

Episode:  1842		score: -143.37		average: -133.23
Episode:  1843		score: -125.29		average: -133.01
Episode:  1844		score: -137.57		average: -133.49
Episode:  1845		score: -114.84		average: -132.87
Episode:  1846		score: -120.25		average: -133.07
Episode:  1847		score: -117.17		average: -133.30
Episode:  1848		score: -109.43		average: -133.31
Episode:  1849		score: -108.55		average: -131.80
Episode:  1850		score: -184.11		average: -133.46
Episode:  1851		score: -115.85		average: -133.46
Episode:  1852		score: -156.32		average: -133.41
Episode:  1853		score: -113.36		average: -133.28
Episode:  1854		score: -120.16		average: -133.21
Episode:  1855		score: -126.45		average: -133.34
Episode:  1856		score: -122.53		average: -133.32
Episode:  1857		score: -118.31		average: -133.43
Episode:  1858		score: -261.44		average: -134.68
Episode:  1859		score: -170.87		average: -134.86
Episode:  1860		score: -100.69		average: -134.69
Episode:  1861		score: -146.33		average: -134.69
Episode:  1862		scor

Episode:  2011		score: -135.12		average: -128.73
Episode:  2012		score: -148.60		average: -128.91
Episode:  2013		score: -144.89		average: -128.80
Episode:  2014		score: -193.15		average: -130.75
Episode:  2015		score: -182.97		average: -130.64
Episode:  2016		score: -103.36		average: -130.03
Episode:  2017		score: -204.25		average: -131.04
Episode:  2018		score: -155.41		average: -131.62
Episode:  2019		score: -108.29		average: -131.18
Episode:  2020		score: -127.93		average: -131.37
Episode:  2021		score: -135.61		average: -131.36
Episode:  2022		score:  -23.58		average: -130.36
Episode:  2023		score: -141.50		average: -130.57
Episode:  2024		score: -122.79		average: -130.41
Episode:  2025		score: -151.35		average: -130.39
Episode:  2026		score:  -99.78		average: -130.11
Episode:  2027		score: -169.94		average: -130.24
Episode:  2028		score: -121.96		average: -130.48
Episode:  2029		score: -118.14		average: -130.34
Episode:  2030		score: -142.79		average: -130.26
Episode:  2031		scor

Episode:  2178		score: -156.80		average: -131.64
Episode:  2179		score: -180.76		average: -131.82
Episode:  2180		score:  -11.42		average: -130.19
Episode:  2181		score: -146.89		average: -130.27
Episode:  2182		score: -176.35		average: -131.18
Episode:  2183		score: -182.46		average: -131.88
Episode:  2184		score: -100.45		average: -131.96
Episode:  2185		score: -140.47		average: -131.92
Episode:  2186		score: -123.28		average: -131.78
Episode:  2187		score: -148.09		average: -132.07
Episode:  2188		score: -143.79		average: -132.24
Episode:  2189		score: -167.60		average: -132.70
Episode:  2190		score:  -98.77		average: -132.55
Episode:  2191		score: -149.13		average: -132.47
Episode:  2192		score: -157.03		average: -132.59
Episode:  2193		score: -160.39		average: -132.95
Episode:  2194		score: -100.72		average: -131.17
Episode:  2195		score: -115.41		average: -130.91
Episode:  2196		score: -199.24		average: -131.30
Episode:  2197		score: -181.71		average: -132.11
Episode:  2198		scor

Episode:  2346		score:  -25.49		average: -123.18
Episode:  2347		score: -155.10		average: -123.58
Episode:  2348		score:  -15.98		average: -122.49
Episode:  2349		score: -108.67		average: -121.92
Episode:  2350		score: -188.14		average: -122.43
Episode:  2351		score:    1.96		average: -120.50
Episode:  2352		score: -156.35		average: -120.64
Episode:  2353		score:  -95.99		average: -120.34
Episode:  2354		score: -157.47		average: -120.55
Episode:  2355		score: -211.73		average: -122.61
Episode:  2356		score: -191.13		average: -123.59
Episode:  2357		score: -123.41		average: -123.28
Episode:  2358		score: -113.83		average: -123.50
Episode:  2359		score:  -98.44		average: -123.16
Episode:  2360		score: -167.63		average: -123.35
Episode:  2361		score: -152.06		average: -123.69
Episode:  2362		score: -192.77		average: -124.27
Episode:  2363		score: -137.77		average: -124.51
Episode:  2364		score: -135.22		average: -125.08
Episode:  2365		score:   -5.89		average: -124.08
Episode:  2366		scor

Episode:  2514		score: -136.86		average: -139.32
Episode:  2515		score: -123.07		average: -138.52
Episode:  2516		score: -118.20		average: -136.62
Episode:  2517		score: -125.45		average: -136.68
Episode:  2518		score: -120.71		average: -134.32
Episode:  2519		score: -172.21		average: -134.16
Episode:  2520		score: -139.31		average: -134.27
Episode:  2521		score:   45.10		average: -132.45
Episode:  2522		score: -116.68		average: -132.51
Episode:  2523		score: -140.87		average: -132.52
Episode:  2524		score: -153.37		average: -132.15
Episode:  2525		score: -153.70		average: -132.31
Episode:  2526		score: -136.15		average: -132.49
Episode:  2527		score: -134.99		average: -132.36
Episode:  2528		score: -135.46		average: -132.36
Episode:  2529		score: -113.04		average: -132.36
Episode:  2530		score: -109.89		average: -132.30
Episode:  2531		score:  -96.34		average: -131.40
Episode:  2532		score: -355.94		average: -133.79
Episode:  2533		score: -123.48		average: -133.68
Episode:  2534		scor

Episode:  2682		score: -134.69		average: -131.14
Episode:  2683		score: -138.01		average: -131.45
Episode:  2684		score: -141.37		average: -131.62
Episode:  2685		score: -132.42		average: -131.69
Episode:  2686		score: -133.99		average: -131.59
Episode:  2687		score:  -21.02		average: -131.65
Episode:  2688		score: -173.29		average: -131.63
Episode:  2689		score: -116.62		average: -131.37
Episode:  2690		score: -143.49		average: -131.32
Episode:  2691		score: -155.53		average: -131.45
Episode:  2692		score: -153.55		average: -131.69
Episode:  2693		score: -135.93		average: -131.45
Episode:  2694		score: -126.09		average: -131.13
Episode:  2695		score: -129.09		average: -131.79
Episode:  2696		score: -116.78		average: -131.19
Episode:  2697		score: -125.75		average: -130.71
Episode:  2698		score:  -19.72		average: -129.48
Episode:  2699		score: -123.25		average: -129.58
Episode:  2700		score: -168.03		average: -130.37
Episode:  2701		score: -144.53		average: -130.13
Episode:  2702		scor

Episode:  2851		score: -181.49		average: -133.69
Episode:  2852		score:  -92.54		average: -133.39
Episode:  2853		score: -125.86		average: -132.82
Episode:  2854		score: -147.43		average: -132.81
Episode:  2855		score: -151.71		average: -133.14
Episode:  2856		score: -141.75		average: -133.41
Episode:  2857		score:  -93.87		average: -132.98
Episode:  2858		score:  -86.68		average: -132.62
Episode:  2859		score: -141.13		average: -132.56
Episode:  2860		score:  -92.32		average: -132.04
Episode:  2861		score: -193.44		average: -134.13
Episode:  2862		score: -143.30		average: -134.18
Episode:  2863		score: -133.75		average: -133.96
Episode:  2864		score:  -88.26		average: -133.86
Episode:  2865		score: -166.52		average: -134.46
Episode:  2866		score: -126.32		average: -134.12
Episode:  2867		score: -100.15		average: -134.24
Episode:  2868		score: -138.92		average: -134.67
Episode:  2869		score: -115.78		average: -135.79
Episode:  2870		score: -145.44		average: -135.36
Episode:  2871		scor

Episode:  3019		score: -119.46		average: -134.72
Episode:  3020		score: -124.97		average: -134.73
Episode:  3021		score: -141.22		average: -134.84
Episode:  3022		score: -148.24		average: -134.79
Episode:  3023		score: -149.45		average: -134.90
Episode:  3024		score: -137.98		average: -135.09
Episode:  3025		score: -141.87		average: -135.09
Episode:  3026		score: -138.21		average: -135.46
Episode:  3027		score: -334.89		average: -137.58
Episode:  3028		score: -139.89		average: -137.63
Episode:  3029		score: -151.36		average: -137.93
Episode:  3030		score: -203.39		average: -138.61
Episode:  3031		score: -138.19		average: -139.05
Episode:  3032		score: -128.00		average: -139.25
Episode:  3033		score: -122.90		average: -139.21
Episode:  3034		score: -177.11		average: -139.72
Episode:  3035		score: -148.93		average: -139.97
Episode:  3036		score: -104.44		average: -139.05
Episode:  3037		score: -145.44		average: -139.43
Episode:  3038		score: -133.24		average: -139.51
Episode:  3039		scor

Episode:  3186		score: -152.08		average: -139.65
Episode:  3187		score: -155.16		average: -140.22
Episode:  3188		score: -163.40		average: -140.11
Episode:  3189		score: -205.32		average: -140.36
Episode:  3190		score:  -93.31		average: -139.66
Episode:  3191		score: -194.21		average: -140.40
Episode:  3192		score: -167.30		average: -140.88
Episode:  3193		score: -128.90		average: -140.88
Episode:  3194		score: -174.46		average: -141.12
Episode:  3195		score: -160.24		average: -141.56
Episode:  3196		score:  -89.25		average: -140.89
Episode:  3197		score: -139.20		average: -141.17
Episode:  3198		score: -134.55		average: -141.26
Episode:  3199		score: -141.19		average: -141.32
Episode:  3200		score:  -34.47		average: -139.95
Episode:  3201		score: -138.37		average: -138.77
Episode:  3202		score: -163.92		average: -138.82
Episode:  3203		score: -154.10		average: -138.60
Episode:  3204		score:   19.06		average: -136.81
Episode:  3205		score: -116.86		average: -136.25
Episode:  3206		scor

Episode:  3354		score: -111.65		average: -130.17
Episode:  3355		score: -135.27		average: -130.08
Episode:  3356		score: -112.04		average: -130.96
Episode:  3357		score: -117.52		average: -130.75
Episode:  3358		score: -120.12		average: -130.59
Episode:  3359		score:  -89.69		average: -129.66
Episode:  3360		score: -173.81		average: -130.18
Episode:  3361		score: -146.08		average: -130.20
Episode:  3362		score:   -5.04		average: -129.11
Episode:  3363		score: -147.99		average: -130.78
Episode:  3364		score: -150.68		average: -130.98
Episode:  3365		score: -166.55		average: -131.12
Episode:  3366		score: -190.26		average: -131.24
Episode:  3367		score: -105.21		average: -130.89
Episode:  3368		score: -175.03		average: -131.07
Episode:  3369		score: -151.39		average: -131.31
Episode:  3370		score: -190.04		average: -131.57
Episode:  3371		score: -164.03		average: -131.59
Episode:  3372		score: -133.02		average: -131.66
Episode:  3373		score: -117.18		average: -131.76
Episode:  3374		scor