Cartpole v0 prueba

In [1]:
import os

import random
import gym
import pylab
import numpy as np
import tensorflow as tf
import time
import matplotlib.pyplot as plt

#tf.config.experimental_run_functions_eagerly(True)
tf.compat.v1.disable_eager_execution()
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras import backend as K
import copy

EPISODES = 10000 # Number of times the enviroment is ran
LOSS_CLIPPING = 0.2 # Approximated values stated in the original paper
ENTROPY_LOSS = 0.001 # Epochs to train the network (recommended between 3 and 30)
LR = 0.0003 # Learning rate 
NUM_ACTIONS = 2 # Number of possible action in the environment
NUM_STATES = 4 # Number of possible states in the exvironment
EPOCHS = 10 # Epochs to train the network (recommended between 3 and 30)
BATCH_SIZE = 64 # Batch size for the neural nets
BUFFER_SIZE = 2048 # Buffer of experiences
SHUFFLE = True # Whether to shuffle data or not while training
OPTIMIZER = Adam # Optimizer for both actor and critic
GAMMA = 0.99 # Used for the estimated reward
LAMBDA = 0.95 # Used in the original paper un the GAE
NORMALIZE = True # Whether to normalize GAE or not



# Create the actor used to select the action given an state
class Actor_Model:
    def __init__(self):
        X_input = Input(NUM_STATES)

        X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X_input)
        X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X)
        X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X)
        
        # Softmax as there are different probabilities depending on the action
        output = Dense(NUM_ACTIONS, activation="softmax")(X)
        
        # Compile the model with the custom loss
        self.model = Model(inputs = X_input, outputs = output)
        self.model.compile(loss=self.ppo_loss, optimizer=OPTIMIZER(lr=LR))

    # Custom loss functions for the PPO
    def ppo_loss(self, y_true, y_pred):
        # Unpack the elements given in the true label
        advantages, true_label, actions = y_true[:, :1], y_true[:, 1:1+NUM_ACTIONS], y_true[:, 1+NUM_ACTIONS:]
        
        prob = actions * y_pred
        old_prob = actions * true_label 

        ratio = K.exp(K.log(prob + 1e-10) - K.log(old_prob + 1e-10))
        
        p1 = ratio * advantages
        p2 = K.clip(ratio, min_value=1 - LOSS_CLIPPING, max_value=1 + LOSS_CLIPPING) * advantages

        actor_loss = -K.mean(K.minimum(p1, p2))

        entropy = -(y_pred * K.log(y_pred + 1e-10))
        entropy = ENTROPY_LOSS * K.mean(entropy)
        
        total_loss = actor_loss - entropy

        return total_loss

# Create the critic which will criticise how the actor is performing    
class Critic_Model:
    def __init__(self):
        X_input = Input(NUM_STATES)

        X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X_input)
        X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X)
        X = Dense(64, activation="relu", kernel_initializer='he_uniform')(X)
        
        # Linear output to know how good the action is
        value = Dense(1)(X)
        
        # Compile it with mse loss and gradient descent
        self.model = Model(inputs=X_input, outputs = value)
        self.model.compile(loss='mse', optimizer=OPTIMIZER(lr=LR))

# Combine both Actor and Critic to create the agent
class PPOAgent:
    def __init__(self, env_name):
        # Environment parameters
        self.env_name = env_name       
        self.env = gym.make(env_name)
        self.episode = 0 # used to track current number episoded since start
        self.max_average = 0 # record max average reached
        
        # Used to plot a grapgh of the train process
        self.scores_, self.average_ = [], []

        # Create Actor-Critic network models
        self.Actor = Actor_Model()
        self.Critic = Critic_Model()
    
        # Names for the models
        self.Actor_name = f"{self.env_name}_PPO_Actor.h5"
        self.Critic_name = f"{self.env_name}_PPO_Critic.h5"

    # Get the action given the current state    
    def act(self, state):
        # Use the network to predict the next action to take, using the model
        prediction = self.Actor.model.predict(state)[0]
        
        # Probability based to choose the action
        action = np.random.choice(NUM_ACTIONS, p=prediction)
        action_onehot = np.zeros([NUM_ACTIONS])
        action_onehot[action] = 1
        return action, action_onehot, prediction

    # Generalized Advantage Estimation implemented in the original paper
    def get_gaes(self, rewards, dones, values, next_values):
        # Dones are used to track when is the final step of an episode, so next values are no applied
        deltas = [r + GAMMA * (1 - d) * nv - v for r, d, nv, v in zip(rewards, dones, next_values, values)]
        
        # Convert list to array as .mean() and .std() are used later
        deltas = np.stack(deltas)
        gaes = copy.deepcopy(deltas)
        
        for t in reversed(range(len(deltas) - 1)):
            gaes[t] = gaes[t] + (1 - dones[t]) * GAMMA * LAMBDA * gaes[t + 1]

        target = gaes + values
        if NORMALIZE:
            gaes = (gaes - gaes.mean()) / (gaes.std() + 1e-8)
        return np.vstack(gaes), np.vstack(target)

    def replay(self, states, actions, rewards, predictions, dones, next_states):
        # Reshape memory to appropriate shape for training
        states = np.vstack(states)
        next_states = np.vstack(next_states)
        actions = np.vstack(actions)
        predictions = np.vstack(predictions)

        # Get Critic network predictions for state and next state
        values = self.Critic.model.predict(states)
        next_values = self.Critic.model.predict(next_states)

        # Get the advantage
        advantages, target = self.get_gaes(rewards, dones, np.squeeze(values), np.squeeze(next_values))

        # Stack info to unpack it in the custom loss
        y_true = np.hstack([advantages, predictions, actions])

        # Training Actor and Critic networks
        a_loss = self.Actor.model.fit(states, y_true, epochs=EPOCHS, verbose=0, shuffle=SHUFFLE, batch_size=BATCH_SIZE)
        c_loss = self.Critic.model.fit(states, target, epochs=EPOCHS, verbose=0, shuffle=SHUFFLE, batch_size=BATCH_SIZE)

 
    def load(self):
        self.Actor.Actor.load_weights(self.Actor_name)
        self.Critic.Critic.load_weights(self.Critic_name)

    def save(self):
        self.Actor.model.save_weights(self.Actor_name)
        self.Critic.model.save_weights(self.Critic_name)

    def run_batch(self): # train every self.Training_batch episodes
        global LR
        state = self.env.reset()
        state = np.reshape(state, [1, NUM_STATES])
        done, score = False, 0
        finished = False
        while finished == False:
            # Instantiate or reset games memory
            states, next_states, actions, rewards, predictions, dones = [], [], [], [], [], []
            for t in range(BUFFER_SIZE):
                #self.env.render()
                # Actor picks an action
                action, action_onehot, prediction = self.act(state)
                # Retrieve new state, reward, and whether the state is terminal
                next_state, reward, done, _ = self.env.step(action)
                # Memorize (state, action, reward) for training
                states.append(state)
                next_states.append(np.reshape(next_state, [1, NUM_STATES]))
                actions.append(action_onehot)
                rewards.append(reward)
                dones.append(done)
                predictions.append(prediction)
                # Update current state
                state = np.reshape(next_state, [1, NUM_STATES])
                score += reward
                if done:
                    self.episode += 1
                    self.scores_.append(score)
                    #average, SAVING = self.PlotModel(score, self.episode)
                    #print("episode: {}/{}, score: {}, average: {:.2f} {}".format(self.episode, EPISODES, score, average, SAVING))
                    if self.episode >= 100:
                        average = sum(self.scores_[-100:])/100
                        print('Episode: {:>5}\t\tscore: {:>7.2f}\t\taverage: {:>7.2f}'.format(self.episode, score, average))
                        if average > self.max_average:
                            self.max_average = average
                            if self.max_average > 150:
                                self.save()
                            LR *= 0.95
                            K.set_value(self.Actor.model.optimizer.learning_rate, LR)
                            K.set_value(self.Critic.model.optimizer.learning_rate, LR)
                            
                        if average > 200:
                            plt.plot(self.scores_)
                            plt.xlabel("Episode")
                            plt.ylabel("Score")
                            finished = True
                            break

                    else:
                        print('Episode: {:>5}\t\tscore: {:>7.2f}\t\taverage: {:>7.2f}'.format(self.episode, score, sum(self.scores_)/len(self.scores_)))
                    
                    
                    state, done, score = self.env.reset(), False, 0
                    state = np.reshape(state, [1, NUM_STATES])
                    
            self.replay(states, actions, rewards, predictions, dones, next_states)
            if self.episode >= EPISODES:
                break
        self.env.close()  
                    
if __name__ == "__main__":
    start = time.time()
    env_name = 'CartPole-v0'
    agent = PPOAgent(env_name)
    #agent.run() # train as PPO, train every epesode
    agent.run_batch() # train as PPO, train every batch, trains better
    #agent.run_multiprocesses(num_worker = 8)  # train PPO multiprocessed (fastest)
    #agent.test()
    print((time.time() - start)/60)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Episode:     1		score:   20.00		average:   20.00
Episode:     2		score:   54.00		average:   37.00
Episode:     3		score:   20.00		average:   31.33
Episode:     4		score:   23.00		average:   29.25
Episode:     5		score:   27.00		average:   28.80
Episode:     6		score:   21.00		average:   27.50
Episode:     7		score:   17.00		average:   26.00
Episode:     8		score:   23.00		average:   25.62
Episode:     9		score:   34.00		average:   26.56
Episode:    10		score:   12.00		average:   25.10
Episode:    11		score:   24.00		average:   25.00
Episode:    12		score:   29.00		average:   25.33
Episode:    13		score:   21.00		average:   25.00
Episode:    14		score:   26.00		average:   25.07
Episode:    15		score:   14.00		average:   24.33
Episode:    16		score:   27.00		average:   24.50
Episode:    17		score:   26.00		average:   24.59
Episode:    18		score:   28.00		average:   24.78
Episode:    19		score:   12.00		avera

Episode:   160		score:   89.00		average:   33.01
Episode:   161		score:  138.00		average:   34.11
Episode:   162		score:   37.00		average:   34.37
Episode:   163		score:   47.00		average:   34.68
Episode:   164		score:   43.00		average:   34.92
Episode:   165		score:   47.00		average:   34.95
Episode:   166		score:   43.00		average:   35.17
Episode:   167		score:   70.00		average:   35.57
Episode:   168		score:   21.00		average:   35.56
Episode:   169		score:   93.00		average:   36.22
Episode:   170		score:   40.00		average:   36.39
Episode:   171		score:   27.00		average:   36.53
Episode:   172		score:   50.00		average:   36.41
Episode:   173		score:   30.00		average:   36.55
Episode:   174		score:   27.00		average:   36.72
Episode:   175		score:   67.00		average:   37.16
Episode:   176		score:   30.00		average:   37.35
Episode:   177		score:   36.00		average:   37.51
Episode:   178		score:   96.00		average:   38.31
Episode:   179		score:   74.00		average:   38.49
Episode:   180		scor

Episode:   329		score:   57.00		average:   82.35
Episode:   330		score:   40.00		average:   82.37
Episode:   331		score:   45.00		average:   82.08
Episode:   332		score:  142.00		average:   82.95
Episode:   333		score:   44.00		average:   82.91
Episode:   334		score:   66.00		average:   82.86
Episode:   335		score:   89.00		average:   81.75
Episode:   336		score:   53.00		average:   81.77
Episode:   337		score:   11.00		average:   80.67
Episode:   338		score:   72.00		average:   81.02
Episode:   339		score:   49.00		average:   81.21
Episode:   340		score:   62.00		average:   80.32
Episode:   341		score:   76.00		average:   80.38
Episode:   342		score:  107.00		average:   81.10
Episode:   343		score:   59.00		average:   81.08
Episode:   344		score:   78.00		average:   80.70
Episode:   345		score:  200.00		average:   81.87
Episode:   346		score:  200.00		average:   83.63
Episode:   347		score:   55.00		average:   83.85
Episode:   348		score:   41.00		average:   82.83
Episode:   349		scor

Episode:   497		score:   74.00		average:   83.92
Episode:   498		score:   61.00		average:   83.11
Episode:   499		score:   84.00		average:   82.65
Episode:   500		score:   40.00		average:   82.70
Episode:   501		score:  165.00		average:   83.63
Episode:   502		score:   54.00		average:   83.53
Episode:   503		score:  200.00		average:   85.08
Episode:   504		score:   68.00		average:   84.12
Episode:   505		score:   51.00		average:   84.14
Episode:   506		score:  129.00		average:   85.08
Episode:   507		score:   40.00		average:   84.23
Episode:   508		score:   79.00		average:   84.49
Episode:   509		score:   69.00		average:   84.30
Episode:   510		score:   32.00		average:   84.21
Episode:   511		score:  127.00		average:   85.15
Episode:   512		score:  160.00		average:   85.65
Episode:   513		score:  114.00		average:   86.28
Episode:   514		score:  107.00		average:   86.34
Episode:   515		score:   62.00		average:   85.85
Episode:   516		score:   42.00		average:   85.78
Episode:   517		scor

Episode:   665		score:   44.00		average:   90.14
Episode:   666		score:  200.00		average:   91.14
Episode:   667		score:   63.00		average:   91.39
Episode:   668		score:   42.00		average:   90.11
Episode:   669		score:  133.00		average:   91.13
Episode:   670		score:   98.00		average:   90.78
Episode:   671		score:  154.00		average:   91.56
Episode:   672		score:  156.00		average:   92.03
Episode:   673		score:   85.00		average:   92.58
Episode:   674		score:   72.00		average:   92.97
Episode:   675		score:   68.00		average:   91.92
Episode:   676		score:  110.00		average:   91.34
Episode:   677		score:  112.00		average:   91.04
Episode:   678		score:   33.00		average:   90.98
Episode:   679		score:   84.00		average:   90.86
Episode:   680		score:   59.00		average:   90.50
Episode:   681		score:   78.00		average:   90.81
Episode:   682		score:  152.00		average:   91.40
Episode:   683		score:  119.00		average:   91.90
Episode:   684		score:  104.00		average:   91.56
Episode:   685		scor

Episode:   833		score:   42.00		average:   89.27
Episode:   834		score:   15.00		average:   88.65
Episode:   835		score:   42.00		average:   88.61
Episode:   836		score:   59.00		average:   88.25
Episode:   837		score:  128.00		average:   89.20
Episode:   838		score:  134.00		average:   89.20
Episode:   839		score:   85.00		average:   89.43
Episode:   840		score:   50.00		average:   89.23
Episode:   841		score:  175.00		average:   90.41
Episode:   842		score:  144.00		average:   91.45
Episode:   843		score:   81.00		average:   90.26
Episode:   844		score:  131.00		average:   90.44
Episode:   845		score:   70.00		average:   90.09
Episode:   846		score:  114.00		average:   90.89
Episode:   847		score:   90.00		average:   90.19
Episode:   848		score:   98.00		average:   90.61
Episode:   849		score:  123.00		average:   91.21
Episode:   850		score:   70.00		average:   90.33
Episode:   851		score:  200.00		average:   90.33
Episode:   852		score:   77.00		average:   90.49
Episode:   853		scor

Episode:  1004		score:   95.00		average:   83.62
Episode:  1005		score:   50.00		average:   83.46
Episode:  1006		score:   90.00		average:   83.27
Episode:  1007		score:  107.00		average:   83.83
Episode:  1008		score:  131.00		average:   84.44
Episode:  1009		score:  200.00		average:   85.40
Episode:  1010		score:  112.00		average:   85.39
Episode:  1011		score:   23.00		average:   85.24
Episode:  1012		score:  117.00		average:   85.70
Episode:  1013		score:   31.00		average:   85.39
Episode:  1014		score:  127.00		average:   85.42
Episode:  1015		score:   82.00		average:   85.88
Episode:  1016		score:  179.00		average:   86.07
Episode:  1017		score:   64.00		average:   86.01
Episode:  1018		score:   34.00		average:   85.47
Episode:  1019		score:   21.00		average:   84.87
Episode:  1020		score:  167.00		average:   86.19
Episode:  1021		score:   61.00		average:   85.35
Episode:  1022		score:   28.00		average:   84.65
Episode:  1023		score:   90.00		average:   84.65
Episode:  1024		scor

Episode:  1171		score:   35.00		average:   89.72
Episode:  1172		score:   74.00		average:   89.90
Episode:  1173		score:   40.00		average:   89.18
Episode:  1174		score:  132.00		average:   89.94
Episode:  1175		score:  200.00		average:   91.23
Episode:  1176		score:   12.00		average:   91.12
Episode:  1177		score:   26.00		average:   90.28
Episode:  1178		score:   42.00		average:   89.92
Episode:  1179		score:  166.00		average:   91.19
Episode:  1180		score:   30.00		average:   90.40
Episode:  1181		score:  135.00		average:   90.55
Episode:  1182		score:   96.00		average:   90.79
Episode:  1183		score:   34.00		average:   89.94
Episode:  1184		score:   65.00		average:   90.42
Episode:  1185		score:   96.00		average:   91.09
Episode:  1186		score:   97.00		average:   90.24
Episode:  1187		score:  116.00		average:   90.17
Episode:  1188		score:   73.00		average:   89.45
Episode:  1189		score:  142.00		average:   90.40
Episode:  1190		score:  149.00		average:   90.41
Episode:  1191		scor

Episode:  1339		score:  147.00		average:   89.24
Episode:  1340		score:   34.00		average:   89.13
Episode:  1341		score:  113.00		average:   89.02
Episode:  1342		score:   26.00		average:   89.02
Episode:  1343		score:   62.00		average:   88.92
Episode:  1344		score:   40.00		average:   89.08
Episode:  1345		score:   40.00		average:   88.92
Episode:  1346		score:   72.00		average:   88.85
Episode:  1347		score:   44.00		average:   88.45
Episode:  1348		score:  132.00		average:   88.48
Episode:  1349		score:   33.00		average:   88.37
Episode:  1350		score:   35.00		average:   88.31
Episode:  1351		score:   88.00		average:   88.93
Episode:  1352		score:   93.00		average:   89.32
Episode:  1353		score:   64.00		average:   88.67
Episode:  1354		score:  100.00		average:   88.73
Episode:  1355		score:  149.00		average:   90.03
Episode:  1356		score:  143.00		average:   90.03
Episode:  1357		score:   44.00		average:   88.81
Episode:  1358		score:   87.00		average:   89.01
Episode:  1359		scor

Episode:  1507		score:  200.00		average:   91.70
Episode:  1508		score:  114.00		average:   92.15
Episode:  1509		score:   57.00		average:   91.15
Episode:  1510		score:  138.00		average:   90.60
Episode:  1511		score:   34.00		average:   90.72
Episode:  1512		score:   94.00		average:   89.91
Episode:  1513		score:   26.00		average:   89.32
Episode:  1514		score:   29.00		average:   88.46
Episode:  1515		score:   54.00		average:   88.04
Episode:  1516		score:  152.00		average:   88.83
Episode:  1517		score:   76.00		average:   88.99
Episode:  1518		score:   69.00		average:   87.68
Episode:  1519		score:   41.00		average:   86.99
Episode:  1520		score:  100.00		average:   86.14
Episode:  1521		score:   32.00		average:   85.77
Episode:  1522		score:   33.00		average:   85.33
Episode:  1523		score:  105.00		average:   86.09
Episode:  1524		score:  178.00		average:   86.29
Episode:  1525		score:   92.00		average:   85.80
Episode:  1526		score:  123.00		average:   86.79
Episode:  1527		scor

Episode:  1675		score:  109.00		average:   92.31
Episode:  1676		score:   27.00		average:   91.53
Episode:  1677		score:  200.00		average:   92.75
Episode:  1678		score:  168.00		average:   93.57
Episode:  1679		score:   93.00		average:   94.23
Episode:  1680		score:   47.00		average:   93.85
Episode:  1681		score:  145.00		average:   94.39
Episode:  1682		score:  200.00		average:   95.39
Episode:  1683		score:   79.00		average:   94.69
Episode:  1684		score:   33.00		average:   94.67
Episode:  1685		score:   52.00		average:   94.60
Episode:  1686		score:  118.00		average:   95.49
Episode:  1687		score:   30.00		average:   94.88
Episode:  1688		score:   16.00		average:   94.17
Episode:  1689		score:  180.00		average:   95.02
Episode:  1690		score:  170.00		average:   94.72
Episode:  1691		score:   43.00		average:   94.33
Episode:  1692		score:   54.00		average:   93.22
Episode:  1693		score:   84.00		average:   92.71
Episode:  1694		score:   16.00		average:   91.01
Episode:  1695		scor

Episode:  1845		score:  131.00		average:   88.89
Episode:  1846		score:   85.00		average:   89.16
Episode:  1847		score:   41.00		average:   88.04
Episode:  1848		score:   99.00		average:   88.91
Episode:  1849		score:   23.00		average:   87.67
Episode:  1850		score:   71.00		average:   86.38
Episode:  1851		score:  200.00		average:   88.15
Episode:  1852		score:   54.00		average:   87.65
Episode:  1853		score:   34.00		average:   87.19
Episode:  1854		score:   45.00		average:   87.17
Episode:  1855		score:   86.00		average:   87.02
Episode:  1856		score:  115.00		average:   87.20
Episode:  1857		score:   60.00		average:   87.49
Episode:  1858		score:  138.00		average:   87.62
Episode:  1859		score:  125.00		average:   87.65
Episode:  1860		score:  139.00		average:   87.83
Episode:  1861		score:  200.00		average:   88.90
Episode:  1862		score:  200.00		average:   89.51
Episode:  1863		score:   83.00		average:   89.37
Episode:  1864		score:   27.00		average:   88.81
Episode:  1865		scor

Episode:  2013		score:   71.00		average:   90.13
Episode:  2014		score:  130.00		average:   89.43
Episode:  2015		score:  150.00		average:   90.37
Episode:  2016		score:   72.00		average:   90.77
Episode:  2017		score:  180.00		average:   92.27
Episode:  2018		score:   96.00		average:   92.10
Episode:  2019		score:  157.00		average:   92.42
Episode:  2020		score:  114.00		average:   92.48
Episode:  2021		score:  194.00		average:   92.76
Episode:  2022		score:  131.00		average:   93.07
Episode:  2023		score:  152.00		average:   94.44
Episode:  2024		score:  114.00		average:   94.67
Episode:  2025		score:   53.00		average:   93.20
Episode:  2026		score:   48.00		average:   92.55
Episode:  2027		score:   92.00		average:   92.86
Episode:  2028		score:   36.00		average:   92.80
Episode:  2029		score:  147.00		average:   93.64
Episode:  2030		score:  130.00		average:   94.41
Episode:  2031		score:   74.00		average:   94.47
Episode:  2032		score:   46.00		average:   94.36
Episode:  2033		scor

Episode:  2181		score:  165.00		average:   96.29
Episode:  2182		score:   81.00		average:   95.48
Episode:  2183		score:  148.00		average:   95.48
Episode:  2184		score:  163.00		average:   96.18
Episode:  2185		score:   67.00		average:   94.85
Episode:  2186		score:   97.00		average:   94.28
Episode:  2187		score:  149.00		average:   95.13
Episode:  2188		score:   34.00		average:   93.47
Episode:  2189		score:   18.00		average:   92.48
Episode:  2190		score:   41.00		average:   91.98
Episode:  2191		score:   94.00		average:   92.66
Episode:  2192		score:  150.00		average:   93.20
Episode:  2193		score:   34.00		average:   91.75
Episode:  2194		score:   54.00		average:   90.98
Episode:  2195		score:   75.00		average:   91.42
Episode:  2196		score:   60.00		average:   91.46
Episode:  2197		score:  153.00		average:   91.01
Episode:  2198		score:  163.00		average:   92.14
Episode:  2199		score:  200.00		average:   92.14
Episode:  2200		score:   84.00		average:   92.62
Episode:  2201		scor

Episode:  2350		score:  120.00		average:  103.91
Episode:  2351		score:  100.00		average:  104.39
Episode:  2352		score:   87.00		average:  104.17
Episode:  2353		score:  128.00		average:  103.45
Episode:  2354		score:   27.00		average:  103.42
Episode:  2355		score:   76.00		average:  103.78
Episode:  2356		score:  149.00		average:  103.61
Episode:  2357		score:   81.00		average:  102.42
Episode:  2358		score:   89.00		average:  101.51
Episode:  2359		score:  114.00		average:  101.24
Episode:  2360		score:   42.00		average:  101.04
Episode:  2361		score:   50.00		average:   99.72
Episode:  2362		score:   45.00		average:   99.14
Episode:  2363		score:   39.00		average:   99.17
Episode:  2364		score:  170.00		average:   99.87
Episode:  2365		score:   47.00		average:   99.56
Episode:  2366		score:   93.00		average:  100.15
Episode:  2367		score:   42.00		average:  100.40
Episode:  2368		score:   74.00		average:  100.41
Episode:  2369		score:  160.00		average:  100.43
Episode:  2370		scor

Episode:  2518		score:  200.00		average:  110.12
Episode:  2519		score:  181.00		average:  111.51
Episode:  2520		score:  140.00		average:  111.95
Episode:  2521		score:   89.00		average:  112.41
Episode:  2522		score:   94.00		average:  112.80
Episode:  2523		score:  123.00		average:  113.12
Episode:  2524		score:  177.00		average:  113.73
Episode:  2525		score:  150.00		average:  114.24
Episode:  2526		score:   85.00		average:  113.98
Episode:  2527		score:   32.00		average:  113.84
Episode:  2528		score:  126.00		average:  114.75
Episode:  2529		score:  114.00		average:  113.89
Episode:  2530		score:   86.00		average:  113.49
Episode:  2531		score:   35.00		average:  113.40
Episode:  2532		score:   45.00		average:  113.16
Episode:  2533		score:   87.00		average:  112.34
Episode:  2534		score:   86.00		average:  112.79
Episode:  2535		score:   31.00		average:  112.75
Episode:  2536		score:   73.00		average:  111.90
Episode:  2537		score:  118.00		average:  112.45
Episode:  2538		scor

Episode:  2687		score:  112.00		average:   94.64
Episode:  2688		score:   59.00		average:   94.96
Episode:  2689		score:   45.00		average:   94.47
Episode:  2690		score:  183.00		average:   95.21
Episode:  2691		score:   98.00		average:   95.90
Episode:  2692		score:  106.00		average:   95.74
Episode:  2693		score:   31.00		average:   95.16
Episode:  2694		score:   27.00		average:   94.98
Episode:  2695		score:  120.00		average:   95.78
Episode:  2696		score:   47.00		average:   94.64
Episode:  2697		score:  200.00		average:   95.36
Episode:  2698		score:  168.00		average:   95.31
Episode:  2699		score:  200.00		average:   96.64
Episode:  2700		score:  112.00		average:   96.58
Episode:  2701		score:   79.00		average:   96.27
Episode:  2702		score:   59.00		average:   96.39
Episode:  2703		score:  144.00		average:   97.64
Episode:  2704		score:   36.00		average:   97.05
Episode:  2705		score:  149.00		average:   97.89
Episode:  2706		score:   31.00		average:   97.79
Episode:  2707		scor

Episode:  2857		score:   20.00		average:   94.15
Episode:  2858		score:  166.00		average:   94.56
Episode:  2859		score:   26.00		average:   93.53
Episode:  2860		score:   21.00		average:   92.29
Episode:  2861		score:  122.00		average:   93.01
Episode:  2862		score:  165.00		average:   94.10
Episode:  2863		score:  115.00		average:   94.36
Episode:  2864		score:  180.00		average:   95.57
Episode:  2865		score:   39.00		average:   94.68
Episode:  2866		score:  200.00		average:   95.16
Episode:  2867		score:   38.00		average:   94.71
Episode:  2868		score:   75.00		average:   94.28
Episode:  2869		score:  163.00		average:   95.69
Episode:  2870		score:   87.00		average:   95.65
Episode:  2871		score:  127.00		average:   96.40
Episode:  2872		score:   25.00		average:   95.76
Episode:  2873		score:   46.00		average:   94.22
Episode:  2874		score:   55.00		average:   94.18
Episode:  2875		score:   85.00		average:   93.90
Episode:  2876		score:  154.00		average:   93.59
Episode:  2877		scor

Episode:  3026		score:  165.00		average:   89.54
Episode:  3027		score:  117.00		average:   90.25
Episode:  3028		score:  200.00		average:   91.90
Episode:  3029		score:   81.00		average:   91.39
Episode:  3030		score:  200.00		average:   92.21
Episode:  3031		score:   65.00		average:   90.86
Episode:  3032		score:   36.00		average:   90.82
Episode:  3033		score:  148.00		average:   91.42
Episode:  3034		score:   72.00		average:   91.80
Episode:  3035		score:  111.00		average:   92.51
Episode:  3036		score:   37.00		average:   90.88
Episode:  3037		score:   50.00		average:   89.87
Episode:  3038		score:   86.00		average:   88.80
Episode:  3039		score:  162.00		average:   90.30
Episode:  3040		score:   37.00		average:   90.29
Episode:  3041		score:   20.00		average:   89.22
Episode:  3042		score:   67.00		average:   88.35
Episode:  3043		score:  142.00		average:   89.21
Episode:  3044		score:  139.00		average:   89.29
Episode:  3045		score:   34.00		average:   89.10
Episode:  3046		scor

Episode:  3194		score:  200.00		average:   79.76
Episode:  3195		score:  177.00		average:   80.73
Episode:  3196		score:   45.00		average:   79.65
Episode:  3197		score:   30.00		average:   79.59
Episode:  3198		score:   87.00		average:   80.01
Episode:  3199		score:   94.00		average:   80.35
Episode:  3200		score:   53.00		average:   80.66
Episode:  3201		score:   95.00		average:   81.25
Episode:  3202		score:  194.00		average:   81.46
Episode:  3203		score:  197.00		average:   82.40
Episode:  3204		score:   35.00		average:   80.95
Episode:  3205		score:   27.00		average:   80.24
Episode:  3206		score:   87.00		average:   80.54
Episode:  3207		score:   98.00		average:   81.06
Episode:  3208		score:  118.00		average:   81.92
Episode:  3209		score:  123.00		average:   81.78
Episode:  3210		score:   53.00		average:   81.01
Episode:  3211		score:  156.00		average:   81.75
Episode:  3212		score:   67.00		average:   81.40
Episode:  3213		score:   73.00		average:   81.79
Episode:  3214		scor

Episode:  3361		score:   74.00		average:   96.88
Episode:  3362		score:  158.00		average:   98.07
Episode:  3363		score:   71.00		average:   97.02
Episode:  3364		score:  131.00		average:   97.45
Episode:  3365		score:   86.00		average:   97.53
Episode:  3366		score:   35.00		average:   97.61
Episode:  3367		score:   49.00		average:   97.24
Episode:  3368		score:   77.00		average:   97.55
Episode:  3369		score:  154.00		average:   98.47
Episode:  3370		score:  126.00		average:   99.17
Episode:  3371		score:  156.00		average:   99.22
Episode:  3372		score:   39.00		average:   98.32
Episode:  3373		score:   74.00		average:   97.29
Episode:  3374		score:  177.00		average:   98.89
Episode:  3375		score:   19.00		average:   97.71
Episode:  3376		score:  124.00		average:   97.59
Episode:  3377		score:   34.00		average:   96.44
Episode:  3378		score:   35.00		average:   95.63
Episode:  3379		score:   45.00		average:   94.56
Episode:  3380		score:  200.00		average:   95.43
Episode:  3381		scor

Episode:  3528		score:   48.00		average:  103.24
Episode:  3529		score:   30.00		average:  102.20
Episode:  3530		score:   60.00		average:  102.03
Episode:  3531		score:   81.00		average:  101.29
Episode:  3532		score:   36.00		average:  100.40
Episode:  3533		score:   44.00		average:  100.54
Episode:  3534		score:  123.00		average:  101.33
Episode:  3535		score:   40.00		average:  100.91
Episode:  3536		score:   96.00		average:  101.18
Episode:  3537		score:   41.00		average:  100.32
Episode:  3538		score:  200.00		average:  101.57
Episode:  3539		score:   24.00		average:  101.46
Episode:  3540		score:   39.00		average:  101.18
Episode:  3541		score:  188.00		average:  102.29
Episode:  3542		score:   92.00		average:  101.80
Episode:  3543		score:  140.00		average:  101.47
Episode:  3544		score:  113.00		average:  101.35
Episode:  3545		score:  106.00		average:  101.88
Episode:  3546		score:   80.00		average:  102.12
Episode:  3547		score:  200.00		average:  103.33
Episode:  3548		scor

KeyboardInterrupt: 