In [None]:
import os

import random
import gym
import pylab
import numpy as np
import tensorflow as tf
import time
import matplotlib.pyplot as plt

#tf.config.experimental_run_functions_eagerly(True)
tf.compat.v1.disable_eager_execution()
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras import backend as K
import copy

EPISODES = 100000 # Number of times the enviroment is ran
LOSS_CLIPPING = 0.2 # Approximated values stated in the original paper
ENTROPY_LOSS = 0.001 # Epochs to train the network (recommended between 3 and 30)
LR = 0.00025 # Learning rate 
NUM_ACTIONS = 4 # Number of possible action in the environment
NUM_STATES = 8 # Number of possible states in the exvironment
EPOCHS = 10 # Epochs to train the network (recommended between 3 and 30)
BATCH_SIZE = 64 # Batch size for the neural nets
BUFFER_SIZE = 2048 # Buffer of experiences
SHUFFLE = True # Whether to shuffle data or not while training
OPTIMIZER = Adam # Optimizer for both actor and critic
GAMMA = 0.99 # Used for the estimated reward
LAMBDA = 0.95 # Used in the original paper un the GAE
NORMALIZE = True # Whether to normalize GAE or not



# Create the actor used to select the action given an state
class Actor_Model:
    def __init__(self):
        X_input = Input(NUM_STATES)

        X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X_input)
        X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X)
        X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X)
        
        # Softmax as there are different probabilities depending on the action
        output = Dense(NUM_ACTIONS, activation="softmax")(X)
        
        # Compile the model with the custom loss
        self.model = Model(inputs = X_input, outputs = output)
        self.model.compile(loss='categorical_crossentropy', optimizer=OPTIMIZER(lr=LR))

# Create the critic which will criticise how the actor is performing    
class Critic_Model:
    def __init__(self):
        X_input = Input(NUM_STATES)

        X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X_input)
        X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X)
        X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X)
        
        # Linear output to know how good the action is
        value = Dense(1)(X)
        
        # Compile it with mse loss and gradient descent
        self.model = Model(inputs=X_input, outputs = value)
        self.model.compile(loss='mse', optimizer=OPTIMIZER(lr=LR))

# Combine both Actor and Critic to create the agent
class PPOAgent:
    def __init__(self, env_name):
        # Environment parameters
        self.env_name = env_name       
        self.env = gym.make(env_name)
        self.episode = 0 # used to track current number episoded since start
        self.max_average = 0 # record max average reached
        
        # Used to plot a grapgh of the train process
        self.scores_, self.average_ = [], []

        # Create Actor-Critic network models
        self.Actor = Actor_Model()
        self.Critic = Critic_Model()
    
        # Names for the models
        self.Actor_name = f"{self.env_name}_PPO_Actor.h5"
        self.Critic_name = f"{self.env_name}_PPO_Critic.h5"

    # Get the action given the current state    
    def act(self, state):
        # Use the network to predict the next action to take, using the model
        prediction = self.Actor.model.predict(state)[0]
        
        # Probability based to choose the action
        action = np.random.choice(NUM_ACTIONS, p=prediction)
        action_onehot = np.zeros([NUM_ACTIONS])
        action_onehot[action] = 1
        return action, action_onehot, prediction

    # Generalized Advantage Estimation implemented in the original paper
    def get_gaes(self, rewards, dones, values, next_values):
        # Dones are used to track when is the final step of an episode, so next values are no applied
        deltas = [r + GAMMA * (1 - d) * nv - v for r, d, nv, v in zip(rewards, dones, next_values, values)]
        
        # Convert list to array as .mean() and .std() are used later
        deltas = np.stack(deltas)
        gaes = copy.deepcopy(deltas)
        
        for t in reversed(range(len(deltas) - 1)):
            gaes[t] = gaes[t] + (1 - dones[t]) * GAMMA * LAMBDA * gaes[t + 1]

        target = gaes + values
        if NORMALIZE:
            gaes = (gaes - gaes.mean()) / (gaes.std() + 1e-8)
        return np.vstack(gaes), np.vstack(target)

    def replay(self, states, actions, rewards, predictions, dones, next_states):
        # Reshape memory to appropriate shape for training
        states = np.vstack(states)
        next_states = np.vstack(next_states)
        actions = np.vstack(actions)
        predictions = np.vstack(predictions)

        # Get Critic network predictions for state and next state
        values = self.Critic.model.predict(states)
        next_values = self.Critic.model.predict(next_states)

        # Get the advantage
        advantages, target = self.get_gaes(rewards, dones, np.squeeze(values), np.squeeze(next_values))

        # Stack info to unpack it in the custom loss
        y_true = actions 
        advantages = np.reshape(advantages, (2048))

        start_train = time.time()
        # Training Actor and Critic networks
        a_loss = self.Actor.model.fit(states, y_true, sample_weight=advantages, epochs=EPOCHS, verbose=0, shuffle=SHUFFLE, batch_size=BATCH_SIZE)
        c_loss = self.Critic.model.fit(states, target, epochs=EPOCHS, verbose=0, shuffle=SHUFFLE, batch_size=BATCH_SIZE)
        #print('Time: ', time.time()-start_train)
 
    def load(self):
        self.Actor.Actor.load_weights(self.Actor_name)
        self.Critic.Critic.load_weights(self.Critic_name)

    def save(self):
        self.Actor.model.save_weights(self.Actor_name)
        self.Critic.model.save_weights(self.Critic_name)

    def run_batch(self): # train every self.Training_batch episodes
        global LR
        state = self.env.reset()
        state = np.reshape(state, [1, NUM_STATES])
        done, score = False, 0
        finished = False
        while finished == False:
            # Instantiate or reset games memory
            states, next_states, actions, rewards, predictions, dones = [], [], [], [], [], []
            for t in range(BUFFER_SIZE):
                #self.env.render()
                # Actor picks an action
                action, action_onehot, prediction = self.act(state)
                # Retrieve new state, reward, and whether the state is terminal
                next_state, reward, done, _ = self.env.step(action)
                # Memorize (state, action, reward) for training
                states.append(state)
                next_states.append(np.reshape(next_state, [1, NUM_STATES]))
                actions.append(action_onehot)
                rewards.append(reward)
                dones.append(done)
                predictions.append(prediction)
                # Update current state
                state = np.reshape(next_state, [1, NUM_STATES])
                score += reward
                if done:
                    self.episode += 1
                    self.scores_.append(score)
                    #average, SAVING = self.PlotModel(score, self.episode)
                    #print("episode: {}/{}, score: {}, average: {:.2f} {}".format(self.episode, EPISODES, score, average, SAVING))
                    if self.episode >= 100:
                        average = sum(self.scores_[-100:])/100
                        print('Episode: {:>5}\t\tscore: {:>7.2f}\t\taverage: {:>7.2f}'.format(self.episode, score, average))
                        if average > self.max_average:
                            self.max_average = average
                            if self.max_average > 150:
                                self.save()
                            LR *= 0.95
                            K.set_value(self.Actor.model.optimizer.learning_rate, LR)
                            K.set_value(self.Critic.model.optimizer.learning_rate, LR)
                            
                        if average > 200:
                            plt.plot(self.scores_)
                            plt.xlabel("Episode")
                            plt.ylabel("Score")
                            finished = True
                            break

                    else:
                        print('Episode: {:>5}\t\tscore: {:>7.2f}\t\taverage: {:>7.2f}'.format(self.episode, score, sum(self.scores_)/len(self.scores_)))
                    
                    
                    state, done, score = self.env.reset(), False, 0
                    state = np.reshape(state, [1, NUM_STATES])
                    
            self.replay(states, actions, rewards, predictions, dones, next_states)
            if self.episode >= EPISODES:
                break
        self.env.close()  
                    
if __name__ == "__main__":
    start = time.time()
    env_name = 'LunarLander-v2'
    agent = PPOAgent(env_name)
    #agent.run() # train as PPO, train every epesode
    agent.run_batch() # train as PPO, train every batch, trains better
    #agent.run_multiprocesses(num_worker = 8)  # train PPO multiprocessed (fastest)
    #agent.test()
    print((time.time() - start)/60)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Episode:     1		score: -410.91		average: -410.91
Episode:     2		score: -348.11		average: -379.51
Episode:     3		score:  -99.32		average: -286.11
Episode:     4		score:  -48.62		average: -226.74
Episode:     5		score:  -76.96		average: -196.78
Episode:     6		score: -158.77		average: -190.45
Episode:     7		score:  -84.17		average: -175.27
Episode:     8		score: -159.45		average: -173.29
Episode:     9		score: -193.51		average: -175.54
Episode:    10		score: -323.99		average: -190.38
Episode:    11		score: -220.21		average: -193.09
Episode:    12		score: -337.56		average: -205.13
Episode:    13		score: -524.00		average: -229.66
Episode:    14		score: -340.30		average: -237.56
Episode:    15		score: -203.28		average: -235.28
Episode:    16		score: -216.58		average: -234.11
Episode:    17		score: -260.36		average: -235.65
Episode:    18		score:  -47.18		average: -225.18
Episode:    19		score: -132.18		avera

Episode:   161		score: -190.06		average: -392.34
Episode:   162		score: -296.01		average: -393.94
Episode:   163		score: -328.93		average: -395.76
Episode:   164		score: -612.25		average: -400.49
Episode:   165		score: -449.33		average: -403.41
Episode:   166		score: -688.28		average: -408.90
Episode:   167		score: -545.23		average: -412.74
Episode:   168		score: -188.86		average: -411.30
Episode:   169		score: -555.74		average: -412.72
Episode:   170		score: -325.59		average: -411.03
Episode:   171		score: -527.36		average: -413.30
Episode:   172		score: -615.82		average: -415.72
Episode:   173		score: -175.28		average: -415.02
Episode:   174		score: -226.54		average: -411.25
Episode:   175		score: -610.27		average: -413.75
Episode:   176		score: -218.61		average: -413.95
Episode:   177		score: -626.90		average: -416.92
Episode:   178		score: -314.27		average: -416.28
Episode:   179		score: -704.66		average: -418.28
Episode:   180		score: -242.47		average: -414.75
Episode:   181		scor

Episode:   329		score: -780.10		average: -570.08
Episode:   330		score: -454.89		average: -570.48
Episode:   331		score: -488.97		average: -571.12
Episode:   332		score: -934.54		average: -575.45
Episode:   333		score: -481.60		average: -576.98
Episode:   334		score: -485.16		average: -574.92
Episode:   335		score: -904.02		average: -580.08
Episode:   336		score: -512.07		average: -579.88
Episode:   337		score: -448.17		average: -577.33
Episode:   338		score: -371.25		average: -576.48
Episode:   339		score: -505.37		average: -575.69
Episode:   340		score: -525.23		average: -577.48
Episode:   341		score: -505.85		average: -577.38
Episode:   342		score: -453.08		average: -578.85
Episode:   343		score: -447.13		average: -578.85
Episode:   344		score: -499.17		average: -579.99
Episode:   345		score: -782.42		average: -582.44
Episode:   346		score: -314.27		average: -581.13
Episode:   347		score: -557.62		average: -581.86
Episode:   348		score: -1017.20		average: -587.06
Episode:   349		sco

Episode:   497		score: -608.71		average: -569.52
Episode:   498		score: -343.69		average: -568.88
Episode:   499		score: -791.26		average: -571.47
Episode:   500		score: -694.62		average: -573.77
Episode:   501		score: -511.01		average: -573.59
Episode:   502		score: -494.77		average: -573.64
Episode:   503		score: -832.73		average: -575.34
Episode:   504		score: -693.77		average: -577.43
Episode:   505		score: -834.60		average: -581.71
Episode:   506		score: -738.52		average: -581.97
Episode:   507		score: -716.55		average: -583.39
Episode:   508		score: -904.09		average: -588.07
Episode:   509		score: -642.86		average: -589.84
Episode:   510		score: -723.42		average: -589.90
Episode:   511		score: -494.94		average: -589.91
Episode:   512		score: -346.23		average: -587.00
Episode:   513		score: -751.63		average: -587.44
Episode:   514		score: -458.74		average: -588.99
Episode:   515		score: -377.18		average: -586.63
Episode:   516		score: -453.94		average: -586.88
Episode:   517		scor

Episode:   665		score: -341.68		average: -576.71
Episode:   666		score: -929.27		average: -578.33
Episode:   667		score: -441.96		average: -577.29
Episode:   668		score: -723.49		average: -581.21
Episode:   669		score: -540.56		average: -582.43
Episode:   670		score: -486.94		average: -582.77
Episode:   671		score: -863.48		average: -586.16
Episode:   672		score: -384.21		average: -582.90
Episode:   673		score: -575.19		average: -584.61
Episode:   674		score: -754.68		average: -585.01
Episode:   675		score: -694.21		average: -586.42
Episode:   676		score: -626.51		average: -589.41
Episode:   677		score: -648.81		average: -588.25
Episode:   678		score: -662.87		average: -590.48
Episode:   679		score: -334.69		average: -584.97
Episode:   680		score: -495.89		average: -585.80
Episode:   681		score: -336.51		average: -581.65
Episode:   682		score: -574.95		average: -582.41
Episode:   683		score: -541.29		average: -580.03
Episode:   684		score: -817.13		average: -583.25
Episode:   685		scor

Episode:   833		score: -641.98		average: -576.15
Episode:   834		score: -383.11		average: -573.97
Episode:   835		score: -454.31		average: -572.90
Episode:   836		score: -498.30		average: -573.94
Episode:   837		score: -329.16		average: -572.78
Episode:   838		score: -507.09		average: -569.22
Episode:   839		score: -419.79		average: -566.34
Episode:   840		score: -380.85		average: -565.30
Episode:   841		score: -774.77		average: -566.13
Episode:   842		score: -481.64		average: -566.12
Episode:   843		score: -439.04		average: -566.42
Episode:   844		score: -730.24		average: -569.45
Episode:   845		score: -715.47		average: -569.40
Episode:   846		score: -530.91		average: -568.84
Episode:   847		score: -759.46		average: -572.04
Episode:   848		score: -1164.68		average: -577.40
Episode:   849		score: -770.68		average: -580.14
Episode:   850		score: -492.34		average: -580.00
Episode:   851		score: -434.94		average: -579.18
Episode:   852		score: -471.20		average: -576.07
Episode:   853		sco

Episode:  1001		score: -668.58		average: -603.85
Episode:  1002		score: -484.64		average: -601.93
Episode:  1003		score: -570.05		average: -599.10
Episode:  1004		score: -323.57		average: -595.58
Episode:  1005		score: -414.28		average: -591.09
Episode:  1006		score: -677.56		average: -593.08
Episode:  1007		score: -592.16		average: -595.87
Episode:  1008		score: -374.51		average: -592.26
Episode:  1009		score: -711.18		average: -595.19
Episode:  1010		score: -444.00		average: -594.09
Episode:  1011		score: -918.86		average: -598.57
Episode:  1012		score: -707.28		average: -597.06
Episode:  1013		score: -585.94		average: -599.08
Episode:  1014		score: -443.03		average: -595.98
Episode:  1015		score: -756.94		average: -598.89
Episode:  1016		score: -829.60		average: -598.62
Episode:  1017		score: -773.37		average: -597.80
Episode:  1018		score: -429.65		average: -593.42
Episode:  1019		score: -355.65		average: -589.38
Episode:  1020		score: -468.32		average: -586.80
Episode:  1021		scor

Episode:  1169		score: -352.26		average: -566.49
Episode:  1170		score: -448.39		average: -561.95
Episode:  1171		score: -465.82		average: -558.97
Episode:  1172		score: -474.47		average: -558.20
Episode:  1173		score: -420.55		average: -556.77
Episode:  1174		score: -795.36		average: -560.17
Episode:  1175		score: -539.24		average: -557.92
Episode:  1176		score: -535.64		average: -558.66
Episode:  1177		score: -331.67		average: -556.53
Episode:  1178		score: -755.97		average: -557.64
Episode:  1179		score: -378.80		average: -555.52
Episode:  1180		score: -534.78		average: -552.31
Episode:  1181		score: -867.68		average: -556.42
Episode:  1182		score: -502.70		average: -556.85
Episode:  1183		score: -546.35		average: -556.81
Episode:  1184		score: -855.75		average: -561.02
Episode:  1185		score: -325.86		average: -558.63
Episode:  1186		score: -379.40		average: -555.21
Episode:  1187		score: -511.16		average: -554.98
Episode:  1188		score: -423.99		average: -554.35
Episode:  1189		scor

Episode:  1338		score: -572.49		average: -571.62
Episode:  1339		score: -575.31		average: -569.60
Episode:  1340		score: -986.44		average: -573.39
Episode:  1341		score: -782.14		average: -574.01
Episode:  1342		score: -790.94		average: -576.92
Episode:  1343		score: -438.90		average: -577.40
Episode:  1344		score: -506.09		average: -578.20
Episode:  1345		score: -645.19		average: -580.04
Episode:  1346		score: -464.47		average: -574.63
Episode:  1347		score: -409.50		average: -572.47
Episode:  1348		score: -485.75		average: -572.11
Episode:  1349		score: -627.53		average: -570.55
Episode:  1350		score: -740.28		average: -572.36
Episode:  1351		score: -421.27		average: -571.28
Episode:  1352		score: -698.29		average: -571.35
Episode:  1353		score: -306.73		average: -571.19
Episode:  1354		score: -551.49		average: -570.63
Episode:  1355		score: -918.24		average: -574.92
Episode:  1356		score: -581.47		average: -574.85
Episode:  1357		score: -520.97		average: -572.48
Episode:  1358		scor

Episode:  1508		score: -457.15		average: -568.17
Episode:  1509		score: -325.49		average: -566.84
Episode:  1510		score: -458.17		average: -564.54
Episode:  1511		score: -842.96		average: -568.13
Episode:  1512		score: -465.75		average: -569.20
Episode:  1513		score: -766.78		average: -571.88
Episode:  1514		score: -543.24		average: -572.73
Episode:  1515		score: -529.19		average: -567.87
Episode:  1516		score: -490.72		average: -569.29
Episode:  1517		score: -404.55		average: -565.52
Episode:  1518		score: -641.69		average: -563.66
Episode:  1519		score: -535.95		average: -565.16
Episode:  1520		score: -352.45		average: -563.37
Episode:  1521		score: -438.78		average: -561.59
Episode:  1522		score: -466.63		average: -563.12
Episode:  1523		score: -445.30		average: -563.06
Episode:  1524		score: -343.89		average: -558.96
Episode:  1525		score: -368.73		average: -559.23
Episode:  1526		score: -509.21		average: -558.42
Episode:  1527		score: -428.11		average: -557.73
Episode:  1528		scor

Episode:  1675		score: -482.88		average: -575.62
Episode:  1676		score: -434.42		average: -575.31
Episode:  1677		score: -802.75		average: -578.44
Episode:  1678		score: -815.22		average: -581.57
Episode:  1679		score: -506.15		average: -582.88
Episode:  1680		score: -557.29		average: -577.58
Episode:  1681		score: -534.37		average: -577.70
Episode:  1682		score: -387.23		average: -577.71
Episode:  1683		score: -360.10		average: -576.20
Episode:  1684		score: -749.92		average: -578.91
Episode:  1685		score: -556.26		average: -580.03
Episode:  1686		score: -539.57		average: -578.30
Episode:  1687		score: -841.82		average: -582.68
Episode:  1688		score: -541.77		average: -582.13
Episode:  1689		score: -471.29		average: -579.82
Episode:  1690		score: -400.27		average: -578.56
Episode:  1691		score: -359.38		average: -575.19
Episode:  1692		score: -728.90		average: -578.25
Episode:  1693		score: -356.37		average: -571.53
Episode:  1694		score: -337.44		average: -562.52
Episode:  1695		scor

Episode:  1843		score: -797.06		average: -587.51
Episode:  1844		score: -515.42		average: -587.50
Episode:  1845		score: -939.32		average: -584.51
Episode:  1846		score: -785.21		average: -586.47
Episode:  1847		score: -585.55		average: -587.02
Episode:  1848		score: -687.75		average: -590.68
Episode:  1849		score: -804.71		average: -588.62
Episode:  1850		score: -742.39		average: -590.71
Episode:  1851		score: -662.34		average: -591.87
Episode:  1852		score: -713.39		average: -591.07
Episode:  1853		score: -449.84		average: -591.31
Episode:  1854		score: -656.19		average: -593.41
Episode:  1855		score: -426.51		average: -589.15
Episode:  1856		score: -345.53		average: -587.35
Episode:  1857		score: -536.84		average: -588.91
Episode:  1858		score: -575.38		average: -588.58
Episode:  1859		score: -439.23		average: -585.79
Episode:  1860		score: -448.48		average: -585.32
Episode:  1861		score: -642.74		average: -586.59
Episode:  1862		score: -621.62		average: -584.88
Episode:  1863		scor

Episode:  2012		score: -522.95		average: -587.23
Episode:  2013		score: -718.98		average: -589.66
Episode:  2014		score: -472.42		average: -589.71
Episode:  2015		score: -331.81		average: -588.32
Episode:  2016		score: -579.97		average: -588.75
Episode:  2017		score: -705.16		average: -587.56
Episode:  2018		score: -314.22		average: -585.32
Episode:  2019		score: -485.46		average: -582.06
Episode:  2020		score: -542.91		average: -578.55
Episode:  2021		score: -542.03		average: -578.81
Episode:  2022		score: -627.71		average: -579.48
Episode:  2023		score: -609.56		average: -578.22
Episode:  2024		score: -483.43		average: -577.46
Episode:  2025		score: -401.72		average: -572.78
Episode:  2026		score: -868.27		average: -576.46
Episode:  2027		score: -396.25		average: -576.65
Episode:  2028		score: -452.54		average: -574.05
Episode:  2029		score: -732.41		average: -576.65
Episode:  2030		score: -495.28		average: -577.56
Episode:  2031		score: -447.53		average: -577.24
Episode:  2032		scor

Episode:  2180		score: -742.90		average: -572.71
Episode:  2181		score: -498.54		average: -572.51
Episode:  2182		score: -699.18		average: -572.06
Episode:  2183		score: -682.93		average: -574.77
Episode:  2184		score: -471.49		average: -574.00
Episode:  2185		score: -452.80		average: -574.11
Episode:  2186		score: -510.68		average: -576.00
Episode:  2187		score: -791.96		average: -578.98
Episode:  2188		score: -952.31		average: -583.06
Episode:  2189		score: -718.39		average: -584.81
Episode:  2190		score: -755.71		average: -588.33
Episode:  2191		score: -556.29		average: -588.69
Episode:  2192		score: -515.02		average: -588.83
Episode:  2193		score: -759.57		average: -590.06
Episode:  2194		score: -500.12		average: -588.99
Episode:  2195		score: -508.78		average: -584.56
Episode:  2196		score: -513.97		average: -581.04
Episode:  2197		score: -534.70		average: -578.94
Episode:  2198		score: -927.77		average: -579.54
Episode:  2199		score: -706.99		average: -580.83
Episode:  2200		scor

Episode:  2349		score: -806.07		average: -562.90
Episode:  2350		score: -566.78		average: -563.22
Episode:  2351		score: -409.76		average: -561.93
Episode:  2352		score: -963.53		average: -566.41
Episode:  2353		score: -642.66		average: -568.23
Episode:  2354		score: -699.69		average: -569.09
Episode:  2355		score: -494.57		average: -569.56
Episode:  2356		score: -545.98		average: -569.30
Episode:  2357		score: -424.78		average: -569.52
Episode:  2358		score: -430.88		average: -566.81
Episode:  2359		score: -364.36		average: -566.62
Episode:  2360		score: -526.95		average: -567.99
Episode:  2361		score: -485.21		average: -567.65
Episode:  2362		score: -767.01		average: -569.76
Episode:  2363		score: -591.94		average: -570.69
Episode:  2364		score: -790.65		average: -573.89
Episode:  2365		score: -504.02		average: -571.34
Episode:  2366		score: -370.10		average: -567.82
Episode:  2367		score: -628.48		average: -569.49
Episode:  2368		score: -781.08		average: -568.77
Episode:  2369		scor

Episode:  2516		score: -502.19		average: -548.86
Episode:  2517		score: -730.96		average: -549.81
Episode:  2518		score: -763.49		average: -553.71
Episode:  2519		score: -631.31		average: -555.81
Episode:  2520		score: -576.30		average: -555.00
Episode:  2521		score: -814.95		average: -555.62
Episode:  2522		score: -758.21		average: -559.12
Episode:  2523		score: -936.87		average: -563.82
Episode:  2524		score: -325.01		average: -561.82
Episode:  2525		score: -619.91		average: -561.72
Episode:  2526		score: -499.57		average: -559.07
Episode:  2527		score: -385.02		average: -554.24
Episode:  2528		score: -494.18		average: -554.11
Episode:  2529		score: -745.64		average: -552.28
Episode:  2530		score: -628.30		average: -550.87
Episode:  2531		score: -326.33		average: -548.38
Episode:  2532		score: -430.06		average: -547.96
Episode:  2533		score: -499.68		average: -546.27
Episode:  2534		score: -510.36		average: -544.87
Episode:  2535		score: -364.57		average: -543.83
Episode:  2536		scor

Episode:  2685		score: -740.60		average: -613.09
Episode:  2686		score: -398.82		average: -612.65
Episode:  2687		score: -669.18		average: -613.51
Episode:  2688		score: -488.56		average: -612.98
Episode:  2689		score: -883.17		average: -616.35
Episode:  2690		score: -477.00		average: -615.19
Episode:  2691		score: -556.19		average: -616.61
Episode:  2692		score: -831.22		average: -620.20
Episode:  2693		score: -540.72		average: -620.96
Episode:  2694		score: -474.42		average: -620.89
Episode:  2695		score: -808.15		average: -625.13
Episode:  2696		score: -514.31		average: -625.74
Episode:  2697		score: -687.33		average: -626.30
Episode:  2698		score: -740.25		average: -625.87
Episode:  2699		score: -502.00		average: -626.03
Episode:  2700		score: -651.82		average: -627.56
Episode:  2701		score: -611.38		average: -624.99
Episode:  2702		score: -786.61		average: -628.34
Episode:  2703		score: -628.87		average: -629.26
Episode:  2704		score: -479.76		average: -630.61
Episode:  2705		scor

Episode:  2855		score: -922.57		average: -591.55
Episode:  2856		score: -656.90		average: -594.10
Episode:  2857		score: -556.82		average: -596.15
Episode:  2858		score: -706.25		average: -598.21
Episode:  2859		score: -461.48		average: -597.95
Episode:  2860		score: -620.39		average: -594.76
Episode:  2861		score: -709.85		average: -597.11
Episode:  2862		score: -745.50		average: -599.68
Episode:  2863		score: -517.76		average: -600.43
Episode:  2864		score: -782.52		average: -603.66
Episode:  2865		score: -554.15		average: -604.77
Episode:  2866		score: -425.61		average: -601.29
Episode:  2867		score: -438.48		average: -595.76
Episode:  2868		score: -495.58		average: -593.54
Episode:  2869		score: -765.38		average: -595.81
Episode:  2870		score: -465.33		average: -592.86
Episode:  2871		score: -592.84		average: -593.68
Episode:  2872		score: -640.13		average: -595.54
Episode:  2873		score: -823.78		average: -599.91
Episode:  2874		score: -505.89		average: -598.74
Episode:  2875		scor

Episode:  3023		score: -424.77		average: -586.85
Episode:  3024		score: -402.25		average: -586.99
Episode:  3025		score: -588.69		average: -586.80
Episode:  3026		score: -486.07		average: -586.74
Episode:  3027		score: -437.86		average: -583.89
Episode:  3028		score: -588.15		average: -585.43
Episode:  3029		score: -356.84		average: -581.99
Episode:  3030		score: -878.77		average: -582.53
Episode:  3031		score: -749.38		average: -582.24
Episode:  3032		score: -744.05		average: -582.62
Episode:  3033		score: -496.27		average: -580.92
Episode:  3034		score: -468.04		average: -576.99
Episode:  3035		score: -543.87		average: -576.90
Episode:  3036		score: -686.90		average: -578.87
Episode:  3037		score: -643.75		average: -581.12
Episode:  3038		score: -518.75		average: -582.21
Episode:  3039		score: -819.92		average: -585.82
Episode:  3040		score: -527.75		average: -582.99
Episode:  3041		score: -558.84		average: -584.00
Episode:  3042		score: -344.89		average: -578.61
Episode:  3043		scor

Episode:  3192		score: -412.28		average: -569.76
Episode:  3193		score: -538.05		average: -571.61
Episode:  3194		score: -846.15		average: -571.27
Episode:  3195		score: -878.16		average: -574.47
Episode:  3196		score: -470.40		average: -571.73
Episode:  3197		score: -523.52		average: -572.54
Episode:  3198		score: -634.32		average: -570.22
Episode:  3199		score: -653.47		average: -571.72
Episode:  3200		score: -484.78		average: -573.01
Episode:  3201		score: -843.51		average: -576.69
Episode:  3202		score: -427.78		average: -575.44
Episode:  3203		score: -639.93		average: -577.59
Episode:  3204		score: -531.38		average: -578.00
Episode:  3205		score: -389.49		average: -576.57
Episode:  3206		score: -567.75		average: -577.46
Episode:  3207		score: -459.81		average: -577.80
Episode:  3208		score: -491.44		average: -577.52
Episode:  3209		score: -529.38		average: -578.31
Episode:  3210		score: -724.53		average: -579.17
Episode:  3211		score: -455.93		average: -578.02
Episode:  3212		scor

Episode:  3362		score: -782.14		average: -582.77
Episode:  3363		score: -706.34		average: -581.92
Episode:  3364		score: -542.83		average: -583.32
Episode:  3365		score: -543.31		average: -584.85
Episode:  3366		score: -941.77		average: -587.52
Episode:  3367		score: -845.02		average: -591.77
Episode:  3368		score: -380.68		average: -591.99
Episode:  3369		score: -453.57		average: -592.87
Episode:  3370		score: -443.87		average: -589.68
Episode:  3371		score: -889.94		average: -594.03
Episode:  3372		score: -625.95		average: -591.16
Episode:  3373		score: -578.51		average: -587.84
Episode:  3374		score: -688.19		average: -589.53
Episode:  3375		score: -779.97		average: -589.24
Episode:  3376		score: -827.77		average: -589.80
Episode:  3377		score: -480.24		average: -589.75
Episode:  3378		score: -426.98		average: -590.41
Episode:  3379		score: -594.79		average: -588.82
Episode:  3380		score: -326.22		average: -583.41
Episode:  3381		score: -461.87		average: -579.83
Episode:  3382		scor

Episode:  3531		score: -700.66		average: -597.19
Episode:  3532		score: -500.71		average: -598.82
Episode:  3533		score: -368.29		average: -598.21
Episode:  3534		score: -504.57		average: -596.20
Episode:  3535		score: -552.77		average: -596.73
Episode:  3536		score: -731.29		average: -595.93
Episode:  3537		score: -613.87		average: -598.85
Episode:  3538		score: -339.28		average: -597.08
Episode:  3539		score: -409.76		average: -595.47
Episode:  3540		score: -495.84		average: -596.34
Episode:  3541		score: -451.08		average: -595.82
Episode:  3542		score: -453.47		average: -594.58
Episode:  3543		score: -475.20		average: -591.62
Episode:  3544		score: -490.95		average: -591.63
Episode:  3545		score: -961.37		average: -596.87
Episode:  3546		score: -510.90		average: -595.96
Episode:  3547		score: -616.13		average: -596.06
Episode:  3548		score: -909.61		average: -599.91
Episode:  3549		score: -442.94		average: -600.46
Episode:  3550		score: -402.20		average: -599.42
Episode:  3551		scor

Episode:  3699		score: -815.35		average: -555.35
Episode:  3700		score: -375.46		average: -554.83
Episode:  3701		score: -354.04		average: -555.09
Episode:  3702		score: -689.89		average: -558.06
Episode:  3703		score: -525.80		average: -559.10
Episode:  3704		score: -604.68		average: -558.83
Episode:  3705		score: -536.49		average: -559.23
Episode:  3706		score: -345.19		average: -557.74
Episode:  3707		score: -655.25		average: -559.63
Episode:  3708		score: -573.33		average: -557.56
Episode:  3709		score: -517.51		average: -555.57
Episode:  3710		score: -483.31		average: -555.37
Episode:  3711		score: -375.32		average: -550.83
Episode:  3712		score: -541.25		average: -549.73
Episode:  3713		score: -533.54		average: -544.79
Episode:  3714		score: -989.50		average: -550.27
Episode:  3715		score: -377.73		average: -549.46
Episode:  3716		score: -470.54		average: -550.41
Episode:  3717		score: -555.81		average: -551.96
Episode:  3718		score: -936.28		average: -554.30
Episode:  3719		scor

Episode:  3868		score: -458.57		average: -592.13
Episode:  3869		score: -573.26		average: -592.23
Episode:  3870		score: -902.51		average: -592.87
Episode:  3871		score: -612.25		average: -593.81
Episode:  3872		score: -786.19		average: -595.33
Episode:  3873		score: -534.04		average: -594.30
Episode:  3874		score: -421.23		average: -593.52
Episode:  3875		score: -555.01		average: -595.26
Episode:  3876		score: -400.54		average: -592.37
Episode:  3877		score: -280.44		average: -590.91
Episode:  3878		score: -694.82		average: -584.58
Episode:  3879		score: -426.23		average: -581.15
Episode:  3880		score: -538.21		average: -582.41
Episode:  3881		score: -746.96		average: -583.87
Episode:  3882		score: -611.55		average: -586.64
Episode:  3883		score: -451.34		average: -585.90
Episode:  3884		score: -563.60		average: -581.93
Episode:  3885		score: -653.28		average: -584.13
Episode:  3886		score: -784.14		average: -584.25
Episode:  3887		score: -709.59		average: -583.11
Episode:  3888		scor

Episode:  4036		score: -409.78		average: -578.62
Episode:  4037		score: -842.22		average: -582.07
Episode:  4038		score: -789.23		average: -586.48
Episode:  4039		score: -628.29		average: -586.82
Episode:  4040		score: -572.29		average: -587.53
Episode:  4041		score: -360.58		average: -583.23
Episode:  4042		score: -720.93		average: -584.97
Episode:  4043		score: -933.65		average: -588.99
Episode:  4044		score: -475.72		average: -587.27
Episode:  4045		score: -529.97		average: -588.02
Episode:  4046		score: -738.53		average: -588.62
Episode:  4047		score: -776.09		average: -592.01
Episode:  4048		score: -481.17		average: -591.83
Episode:  4049		score: -463.65		average: -589.02
Episode:  4050		score: -586.91		average: -590.07
Episode:  4051		score: -466.34		average: -590.64
Episode:  4052		score: -475.88		average: -586.49
Episode:  4053		score: -570.22		average: -585.72
Episode:  4054		score: -515.39		average: -583.18
Episode:  4055		score: -803.97		average: -581.18
Episode:  4056		scor

Episode:  4205		score: -602.17		average: -559.47
Episode:  4206		score: -579.09		average: -560.47
Episode:  4207		score: -718.27		average: -563.49
Episode:  4208		score: -360.89		average: -561.60
Episode:  4209		score: -445.03		average: -560.48
Episode:  4210		score: -675.34		average: -562.98
Episode:  4211		score: -477.33		average: -564.06
Episode:  4212		score: -673.40		average: -566.92
Episode:  4213		score: -800.54		average: -565.67
Episode:  4214		score: -742.11		average: -567.91
Episode:  4215		score: -542.54		average: -567.34
Episode:  4216		score: -810.21		average: -570.23
Episode:  4217		score: -411.94		average: -567.02
Episode:  4218		score: -537.49		average: -568.86
Episode:  4219		score: -344.95		average: -567.15
Episode:  4220		score: -371.64		average: -563.28
Episode:  4221		score: -848.36		average: -567.19
Episode:  4222		score: -461.58		average: -567.07
Episode:  4223		score: -470.28		average: -567.09
Episode:  4224		score: -522.45		average: -564.72
Episode:  4225		scor

Episode:  4374		score: -866.82		average: -576.85
Episode:  4375		score: -434.59		average: -573.28
Episode:  4376		score: -674.99		average: -575.77
Episode:  4377		score: -757.62		average: -577.85
Episode:  4378		score: -590.20		average: -570.95
Episode:  4379		score: -603.87		average: -571.93
Episode:  4380		score: -454.95		average: -572.53
Episode:  4381		score: -696.17		average: -573.88
Episode:  4382		score: -444.85		average: -573.73
Episode:  4383		score: -694.25		average: -573.50
Episode:  4384		score: -738.59		average: -572.96
Episode:  4385		score: -608.27		average: -570.62
Episode:  4386		score: -628.10		average: -572.13
Episode:  4387		score: -406.94		average: -571.80
Episode:  4388		score: -341.16		average: -570.39
Episode:  4389		score: -480.60		average: -569.26
Episode:  4390		score: -556.88		average: -569.68
Episode:  4391		score: -749.86		average: -569.52
Episode:  4392		score: -486.87		average: -570.38
Episode:  4393		score: -780.80		average: -572.29
Episode:  4394		scor

Episode:  4543		score: -533.31		average: -602.54
Episode:  4544		score: -551.71		average: -602.90
Episode:  4545		score: -435.00		average: -597.78
Episode:  4546		score: -748.72		average: -598.12
Episode:  4547		score: -354.99		average: -596.97
Episode:  4548		score: -515.88		average: -598.23
Episode:  4549		score: -449.63		average: -594.78
Episode:  4550		score: -586.52		average: -596.78
Episode:  4551		score: -948.58		average: -602.29
Episode:  4552		score: -538.56		average: -603.68
Episode:  4553		score: -500.05		average: -604.55
Episode:  4554		score: -687.20		average: -606.92
Episode:  4555		score: -474.41		average: -607.72
Episode:  4556		score: -431.44		average: -602.84
Episode:  4557		score: -436.93		average: -602.12
Episode:  4558		score: -776.01		average: -606.34
Episode:  4559		score: -842.78		average: -610.05
Episode:  4560		score: -767.62		average: -607.86
Episode:  4561		score: -695.10		average: -609.27
Episode:  4562		score: -499.98		average: -607.85
Episode:  4563		scor

Episode:  4710		score: -295.60		average: -586.05
Episode:  4711		score: -475.34		average: -585.09
Episode:  4712		score: -423.52		average: -579.75
Episode:  4713		score: -398.68		average: -577.97
Episode:  4714		score: -943.85		average: -581.27
Episode:  4715		score: -787.29		average: -583.91
Episode:  4716		score: -563.63		average: -584.72
Episode:  4717		score: -782.92		average: -587.72
Episode:  4718		score: -951.40		average: -587.50
Episode:  4719		score: -689.32		average: -585.78
Episode:  4720		score: -333.68		average: -583.21
Episode:  4721		score: -855.97		average: -584.11
Episode:  4722		score: -584.21		average: -584.67
Episode:  4723		score: -770.86		average: -588.50
Episode:  4724		score: -452.03		average: -582.46
Episode:  4725		score: -321.47		average: -578.08
Episode:  4726		score: -495.10		average: -578.27
Episode:  4727		score: -350.01		average: -578.30
Episode:  4728		score: -408.54		average: -575.14
Episode:  4729		score: -728.69		average: -577.56
Episode:  4730		scor

Episode:  4877		score: -574.06		average: -570.99
Episode:  4878		score: -330.03		average: -570.45
Episode:  4879		score: -443.04		average: -571.32
Episode:  4880		score: -563.90		average: -571.62
Episode:  4881		score: -398.30		average: -568.26
Episode:  4882		score: -431.50		average: -568.24
Episode:  4883		score: -875.37		average: -568.34
Episode:  4884		score: -582.79		average: -566.20
Episode:  4885		score: -381.14		average: -561.69
Episode:  4886		score: -570.72		average: -560.42
Episode:  4887		score: -436.50		average: -557.31
Episode:  4888		score: -835.77		average: -558.94
Episode:  4889		score: -882.23		average: -558.76
Episode:  4890		score: -960.57		average: -563.85
Episode:  4891		score: -574.01		average: -565.55
Episode:  4892		score: -468.09		average: -565.29
Episode:  4893		score: -493.56		average: -564.12
Episode:  4894		score: -388.94		average: -561.57
Episode:  4895		score: -499.68		average: -561.97
Episode:  4896		score: -504.34		average: -559.82
Episode:  4897		scor

Episode:  5045		score: -328.53		average: -576.39
Episode:  5046		score: -800.99		average: -581.68
Episode:  5047		score: -295.63		average: -579.65
Episode:  5048		score: -479.27		average: -573.36
Episode:  5049		score: -688.31		average: -574.79
Episode:  5050		score: -392.27		average: -573.74
Episode:  5051		score: -485.39		average: -573.59
Episode:  5052		score: -515.55		average: -574.18
Episode:  5053		score: -856.68		average: -579.03
Episode:  5054		score: -499.40		average: -575.21
Episode:  5055		score: -443.80		average: -571.97
Episode:  5056		score: -451.85		average: -567.71
Episode:  5057		score: -799.50		average: -570.78
Episode:  5058		score: -546.29		average: -569.96
Episode:  5059		score: -422.22		average: -566.92
Episode:  5060		score: -355.63		average: -566.03
Episode:  5061		score: -496.62		average: -563.94
Episode:  5062		score: -373.03		average: -563.18
Episode:  5063		score: -620.20		average: -564.34
Episode:  5064		score: -749.58		average: -566.27
Episode:  5065		scor