In [22]:
import sys
#change it to your own path to where 'gym' is installed
sys.path.append('/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages')
import random
import gym
import numpy as np

from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras.utils import multi_gpu_model

#from JSAnimation.IPython_display import display_animation
from matplotlib import animation
from IPython.display import display
from multiprocessing import Pool

In [23]:
EPISODES = 1000
TIME_LIMIT = 1000

In [24]:
from keras.utils import Sequence

class DataSequence(Sequence):
    def __init__(self, dataset, model, batch_size):
        self.data=dataset
        self.batch_size = batch_size
        self.model=model
        self.gamma = 0.95 # discount rate

    def __len__(self):
        return int(np.ceil(len(self.data) / float(self.batch_size)))
    

    def __getitem__(self, idx):        
        minibatch=self.data[idx * self.batch_size:(idx + 1) * self.batch_size]

        states=[]
        targets_f=[]

        for state, action, reward, next_state, done in minibatch:
            states.append(state)
            target = reward
            if not done:
                target = (reward + self.gamma *
                          np.amax(self.model.predict(next_state)[0]))

            target_f = self.model.predict(state)
            target_f[0][action] = target

            targets_f.append(target_f)
        return np.array(states), np.array(targets_f)


In [25]:
class DQNAgent:
    def __init__(self, state_size, action_size,batch_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=5000) # maximum number of samples stored in dataset
        self.gamma = 0.95 # discount rate
        self.epsilon = 0.2 # exploration rate
        #self.epsilon = 1.0 # exploration rate
        self.epsilon_min = 0.01 # minimum exploration rate
        #self.epsilon_decay = 0.995 # decay rate for exploration
        self.epsilon_decay = 1 # decay rate for exploration
        self.learning_rate = 0.001
        self.batch_size=batch_size
        self.model = self.normal_model(4)

    def _build_model_2L(self):
        """2-layer Neural Net for Deep-Q learning Model."""
        model = Sequential()
        model.add(Dense(units=24, input_dim=self.state_size, activation='relu')) # input layer
        model.add(Dense(units=self.action_size, activation='linear')) # output layer
        #model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate)) # loss function = mean squared error
        return model
    
    def _build_model_3L(self):
        """3-layer Neural Net for Deep-Q learning Model."""
        model = Sequential()
        model.add(Dense(units=24, input_dim=self.state_size, activation='relu')) # input layer
        model.add(Dense(units=24, activation='relu'))
        model.add(Dense(units=self.action_size, activation='linear')) # output layer
        #model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate)) # loss function = mean squared error
        return model

    def _build_model_4L(self):
        """4-layer Neural Net for Deep-Q learning Model."""
        model = Sequential()
        model.add(Dense(units=24, input_dim=self.state_size, activation='relu')) # input layer
        model.add(Dense(units=24, activation='relu'))
        model.add(Dense(units=24, activation='relu'))
        model.add(Dense(units=self.action_size, activation='linear')) # output layer
        #model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate)) # loss function = mean squared error
        return model
    
    def normal_model(self,layer=4):
        if layer==2:
            model=self._build_model_2L()
        elif layer==3:
            model = self._build_model_3L()
        elif layer==4:
            model = self._build_model_4L()
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate)) # loss function = mean squared error
        return model
        
    def parallel_model(self,layer=4):
        if layer==2:
            model=self._build_model_2L()
        elif layer==3:
            model = self._build_model_3L()
        elif layer==4:
            model = self._build_model_4L()
        # Not needed to change the device scope for model definition:
        p_model = multi_gpu_model(model, cpu_relocation=True)
        p_model.compile(loss='mse',optimizer='Adam(lr=self.learning_rate)')
        return p_model

    def remember(self, state, action, reward, next_state, done):
        """Store s,a,r,s' by appending to self.memory."""
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        """Choose action randomly (explore) or by model prediction (exploit)."""
        if np.random.rand() <= self.epsilon: # explore with probabiluty self.epsilon
            return random.randrange(self.action_size)

        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # returns action
    
    def fit_model(self,minibatch):
#         for state, action, reward, next_state, done in minibatch:
#             target = reward
#             if not done:
#                 target = (reward + self.gamma *
#                           np.amax(self.model.predict(next_state)[0]))

#             target_f = self.model.predict(state)
#             target_f[0][action] = target
#         multiprocess_batchsize=500
    
        seq=DataSequence(minibatch,self.model,self.batch_size)
        
        self.model.fit_generator(generator=seq, 
                epochs = 1, 
                verbose=1,
                workers=8,
                use_multiprocessing=True)
            #self.model.fit(state, target_f, epochs=1, verbose=0) # epochs = number of iterations over the minibatch
            
    def replay(self, batch_size):
        """Train the neural net on the episodes in self.memory. 
           Only N samples defined by batch_size are sampled from self.memory for training.
        """
        minibatch = random.sample(self.memory, 5000)
        #chunks = [minibatch[i:i + 100] for i in range(0, len(minibatch), 100)]
        #pool=Pool()
        #pool.map(self.fit_model, chunks)

        self.fit_model(self.minibatch)
              
        # Decaying exploration rate
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)



In [26]:
env = gym.make('Phoenix-ram-v0')

In [27]:
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
agent = DQNAgent(state_size, action_size)
done = False
batch_size = 500
scores = [] # store the score for each completed episode

In [28]:
import time
if __name__ == '__main__':
    env = gym.make('Phoenix-ram-v0')
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n
    agent = DQNAgent(state_size, action_size,batch_size)
    done = False
    batch_size = 5000
    scores = [] # store the score for each completed episode

    for episode in range(EPISODES):
        #start_time=time.clock()
        
        print('episode = {}'.format(episode))
        state = env.reset()
        state = np.reshape(state, [1, state_size])
        score = 0

        for time in range(TIME_LIMIT):
            # env.render()
            action = agent.act(state) # DQN agent chooses next action 
            next_state, reward, done, _ = env.step(action) # observe rewards and successor state
            score += reward # keep track of game score
            next_state = np.reshape(next_state, [1, state_size])
            agent.remember(state, action, reward, next_state, done) # add s,a,r,s' to dataset (agent.memory)
            state = next_state        
                    
            if done or time==TIME_LIMIT:
                print('episode: {}/{}, scores: {}, exploration rate: {:.2}'
                          .format(episode, EPISODES, scores, agent.epsilon))
                scores.append(score)
                #print('scores = {}'.format(scores))
                break

            # Train NN after each episode or timeout by randomly sampling a batch from the dataset in agent.memory
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)
                
        #print ('time elpased={}'.format(time.clock()-start_time))
        
        if episode%20==0: #save every 20 episodes
            # Save weights after training is complete
            agent.save('phoenix_dqn_3L.h5')


episode = 0
episode = 1
episode = 2
episode = 3
episode = 4
episode: 4/1000, scores: [], exploration rate: 0.2
episode = 5
episode: 5/1000, scores: [440.0], exploration rate: 0.2
episode = 6
episode = 7
episode: 7/1000, scores: [440.0, 360.0], exploration rate: 0.2
episode = 8
episode = 9
episode: 9/1000, scores: [440.0, 360.0, 240.0], exploration rate: 0.2
episode = 10
episode: 10/1000, scores: [440.0, 360.0, 240.0, 200.0], exploration rate: 0.2
episode = 11
episode = 12
episode = 13
episode = 14
episode = 15
episode = 16
episode = 17
episode = 18
episode = 19
episode = 20
episode = 21
episode = 22
episode = 23
episode = 24
episode: 24/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0], exploration rate: 0.2
episode = 25
episode: 25/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0], exploration rate: 0.2
episode = 26
episode = 27
episode: 27/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0], exploration rate: 0.2
episode = 28
episode = 29
episode = 30
episode = 31


episode = 184
episode = 185
episode = 186
episode = 187
episode = 188
episode = 189
episode = 190
episode = 191
episode = 192
episode = 193
episode = 194
episode = 195
episode = 196
episode = 197
episode = 198
episode = 199
episode = 200
episode = 201
episode = 202
episode = 203
episode = 204
episode = 205
episode = 206
episode: 206/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 360.0, 420.0, 260.0, 220.0, 180.0, 180.0, 220.0, 180.0, 200.0, 180.0, 300.0, 1130.0, 320.0, 160.0], exploration rate: 0.2
episode = 207
episode = 208
episode = 209
episode: 209/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 360.0, 420.0, 260.0, 220.0, 180.0, 180.0, 220.0, 180.0, 200.0, 180.0, 300.0, 1130.0, 320.0, 160.0, 320.0], exploration rate: 0.2
episode = 210
episode = 211
episode

episode = 310
episode = 311
episode = 312
episode: 312/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 360.0, 420.0, 260.0, 220.0, 180.0, 180.0, 220.0, 180.0, 200.0, 180.0, 300.0, 1130.0, 320.0, 160.0, 320.0, 800.0, 1480.0, 280.0, 160.0, 380.0, 420.0, 140.0, 180.0, 280.0, 220.0, 320.0, 280.0, 340.0, 480.0, 200.0, 480.0, 360.0], exploration rate: 0.2
episode = 313
episode: 313/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 360.0, 420.0, 260.0, 220.0, 180.0, 180.0, 220.0, 180.0, 200.0, 180.0, 300.0, 1130.0, 320.0, 160.0, 320.0, 800.0, 1480.0, 280.0, 160.0, 380.0, 420.0, 140.0, 180.0, 280.0, 220.0, 320.0, 280.0, 340.0, 480.0, 200.0, 480.0, 360.0, 240.0], exploration rate: 0.2
episode = 314
episode = 315
episode = 316
episode: 316/1000, scores: [440.0, 360.0, 240.0

episode = 389
episode = 390
episode = 391
episode = 392
episode = 393
episode: 393/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 360.0, 420.0, 260.0, 220.0, 180.0, 180.0, 220.0, 180.0, 200.0, 180.0, 300.0, 1130.0, 320.0, 160.0, 320.0, 800.0, 1480.0, 280.0, 160.0, 380.0, 420.0, 140.0, 180.0, 280.0, 220.0, 320.0, 280.0, 340.0, 480.0, 200.0, 480.0, 360.0, 240.0, 260.0, 200.0, 420.0, 260.0, 160.0, 240.0, 420.0, 300.0, 300.0, 260.0, 180.0, 240.0, 360.0, 240.0], exploration rate: 0.2
episode = 394
episode = 395
episode = 396
episode = 397
episode = 398
episode = 399
episode: 399/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 360.0, 420.0, 260.0, 220.0, 180.0, 180.0, 220.0, 180.0, 200.0, 180.0, 300.0, 1130.0, 320.0, 160.0, 320.0, 800.0, 1480.0, 280.0, 160.0, 380.0, 

episode = 452
episode = 453
episode = 454
episode: 454/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 360.0, 420.0, 260.0, 220.0, 180.0, 180.0, 220.0, 180.0, 200.0, 180.0, 300.0, 1130.0, 320.0, 160.0, 320.0, 800.0, 1480.0, 280.0, 160.0, 380.0, 420.0, 140.0, 180.0, 280.0, 220.0, 320.0, 280.0, 340.0, 480.0, 200.0, 480.0, 360.0, 240.0, 260.0, 200.0, 420.0, 260.0, 160.0, 240.0, 420.0, 300.0, 300.0, 260.0, 180.0, 240.0, 360.0, 240.0, 240.0, 320.0, 180.0, 280.0, 180.0, 460.0, 260.0, 260.0, 200.0, 240.0, 340.0, 180.0, 160.0], exploration rate: 0.2
episode = 455
episode = 456
episode = 457
episode = 458
episode = 459
episode = 460
episode: 460/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 360.0, 420.0, 260.0, 220.0, 180.0, 180.0, 220.0, 180.0, 200.0, 180.0, 300.0, 11

episode = 525
episode = 526
episode = 527
episode = 528
episode = 529
episode: 529/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 360.0, 420.0, 260.0, 220.0, 180.0, 180.0, 220.0, 180.0, 200.0, 180.0, 300.0, 1130.0, 320.0, 160.0, 320.0, 800.0, 1480.0, 280.0, 160.0, 380.0, 420.0, 140.0, 180.0, 280.0, 220.0, 320.0, 280.0, 340.0, 480.0, 200.0, 480.0, 360.0, 240.0, 260.0, 200.0, 420.0, 260.0, 160.0, 240.0, 420.0, 300.0, 300.0, 260.0, 180.0, 240.0, 360.0, 240.0, 240.0, 320.0, 180.0, 280.0, 180.0, 460.0, 260.0, 260.0, 200.0, 240.0, 340.0, 180.0, 160.0, 930.0, 140.0, 320.0, 480.0, 380.0, 180.0, 440.0, 260.0, 200.0, 400.0, 260.0], exploration rate: 0.2
episode = 530
episode = 531
episode = 532
episode = 533
episode: 533/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 36

episode = 587
episode = 588
episode = 589
episode = 590
episode = 591
episode = 592
episode = 593
episode = 594
episode: 594/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 360.0, 420.0, 260.0, 220.0, 180.0, 180.0, 220.0, 180.0, 200.0, 180.0, 300.0, 1130.0, 320.0, 160.0, 320.0, 800.0, 1480.0, 280.0, 160.0, 380.0, 420.0, 140.0, 180.0, 280.0, 220.0, 320.0, 280.0, 340.0, 480.0, 200.0, 480.0, 360.0, 240.0, 260.0, 200.0, 420.0, 260.0, 160.0, 240.0, 420.0, 300.0, 300.0, 260.0, 180.0, 240.0, 360.0, 240.0, 240.0, 320.0, 180.0, 280.0, 180.0, 460.0, 260.0, 260.0, 200.0, 240.0, 340.0, 180.0, 160.0, 930.0, 140.0, 320.0, 480.0, 380.0, 180.0, 440.0, 260.0, 200.0, 400.0, 260.0, 260.0, 380.0, 260.0, 240.0, 300.0, 180.0, 320.0, 460.0, 200.0, 440.0, 260.0], exploration rate: 0.2
episode = 595
episode = 596
episode = 597
episode = 598
episode = 599
episode = 600
episode = 601
episode = 602
ep

episode = 663
episode = 664
episode = 665
episode = 666
episode = 667
episode = 668
episode = 669
episode = 670
episode = 671
episode: 671/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 360.0, 420.0, 260.0, 220.0, 180.0, 180.0, 220.0, 180.0, 200.0, 180.0, 300.0, 1130.0, 320.0, 160.0, 320.0, 800.0, 1480.0, 280.0, 160.0, 380.0, 420.0, 140.0, 180.0, 280.0, 220.0, 320.0, 280.0, 340.0, 480.0, 200.0, 480.0, 360.0, 240.0, 260.0, 200.0, 420.0, 260.0, 160.0, 240.0, 420.0, 300.0, 300.0, 260.0, 180.0, 240.0, 360.0, 240.0, 240.0, 320.0, 180.0, 280.0, 180.0, 460.0, 260.0, 260.0, 200.0, 240.0, 340.0, 180.0, 160.0, 930.0, 140.0, 320.0, 480.0, 380.0, 180.0, 440.0, 260.0, 200.0, 400.0, 260.0, 260.0, 380.0, 260.0, 240.0, 300.0, 180.0, 320.0, 460.0, 200.0, 440.0, 260.0, 220.0, 300.0, 200.0, 240.0, 340.0, 200.0, 680.0, 660.0, 200.0], exploration rate: 0.2
episode = 672
episode = 673
episode =

episode = 705
episode = 706
episode = 707
episode = 708
episode: 708/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 360.0, 420.0, 260.0, 220.0, 180.0, 180.0, 220.0, 180.0, 200.0, 180.0, 300.0, 1130.0, 320.0, 160.0, 320.0, 800.0, 1480.0, 280.0, 160.0, 380.0, 420.0, 140.0, 180.0, 280.0, 220.0, 320.0, 280.0, 340.0, 480.0, 200.0, 480.0, 360.0, 240.0, 260.0, 200.0, 420.0, 260.0, 160.0, 240.0, 420.0, 300.0, 300.0, 260.0, 180.0, 240.0, 360.0, 240.0, 240.0, 320.0, 180.0, 280.0, 180.0, 460.0, 260.0, 260.0, 200.0, 240.0, 340.0, 180.0, 160.0, 930.0, 140.0, 320.0, 480.0, 380.0, 180.0, 440.0, 260.0, 200.0, 400.0, 260.0, 260.0, 380.0, 260.0, 240.0, 300.0, 180.0, 320.0, 460.0, 200.0, 440.0, 260.0, 220.0, 300.0, 200.0, 240.0, 340.0, 200.0, 680.0, 660.0, 200.0, 160.0, 160.0, 260.0, 300.0, 200.0, 240.0, 160.0, 280.0, 180.0], exploration rate: 0.2
episode = 709
episode = 710
episode = 711
ep

episode = 753
episode = 754
episode = 755
episode = 756
episode: 756/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 360.0, 420.0, 260.0, 220.0, 180.0, 180.0, 220.0, 180.0, 200.0, 180.0, 300.0, 1130.0, 320.0, 160.0, 320.0, 800.0, 1480.0, 280.0, 160.0, 380.0, 420.0, 140.0, 180.0, 280.0, 220.0, 320.0, 280.0, 340.0, 480.0, 200.0, 480.0, 360.0, 240.0, 260.0, 200.0, 420.0, 260.0, 160.0, 240.0, 420.0, 300.0, 300.0, 260.0, 180.0, 240.0, 360.0, 240.0, 240.0, 320.0, 180.0, 280.0, 180.0, 460.0, 260.0, 260.0, 200.0, 240.0, 340.0, 180.0, 160.0, 930.0, 140.0, 320.0, 480.0, 380.0, 180.0, 440.0, 260.0, 200.0, 400.0, 260.0, 260.0, 380.0, 260.0, 240.0, 300.0, 180.0, 320.0, 460.0, 200.0, 440.0, 260.0, 220.0, 300.0, 200.0, 240.0, 340.0, 200.0, 680.0, 660.0, 200.0, 160.0, 160.0, 260.0, 300.0, 200.0, 240.0, 160.0, 280.0, 180.0, 240.0, 200.0, 300.0, 320.0, 420.0, 220.0, 320.0, 200.0, 560.0], exp

episode = 797
episode = 798
episode = 799
episode = 800
episode = 801
episode = 802
episode = 803
episode = 804
episode = 805
episode = 806
episode = 807
episode: 807/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 360.0, 420.0, 260.0, 220.0, 180.0, 180.0, 220.0, 180.0, 200.0, 180.0, 300.0, 1130.0, 320.0, 160.0, 320.0, 800.0, 1480.0, 280.0, 160.0, 380.0, 420.0, 140.0, 180.0, 280.0, 220.0, 320.0, 280.0, 340.0, 480.0, 200.0, 480.0, 360.0, 240.0, 260.0, 200.0, 420.0, 260.0, 160.0, 240.0, 420.0, 300.0, 300.0, 260.0, 180.0, 240.0, 360.0, 240.0, 240.0, 320.0, 180.0, 280.0, 180.0, 460.0, 260.0, 260.0, 200.0, 240.0, 340.0, 180.0, 160.0, 930.0, 140.0, 320.0, 480.0, 380.0, 180.0, 440.0, 260.0, 200.0, 400.0, 260.0, 260.0, 380.0, 260.0, 240.0, 300.0, 180.0, 320.0, 460.0, 200.0, 440.0, 260.0, 220.0, 300.0, 200.0, 240.0, 340.0, 200.0, 680.0, 660.0, 200.0, 160.0, 160.0, 260.0, 300.0, 200.

episode = 855
episode = 856
episode: 856/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 360.0, 420.0, 260.0, 220.0, 180.0, 180.0, 220.0, 180.0, 200.0, 180.0, 300.0, 1130.0, 320.0, 160.0, 320.0, 800.0, 1480.0, 280.0, 160.0, 380.0, 420.0, 140.0, 180.0, 280.0, 220.0, 320.0, 280.0, 340.0, 480.0, 200.0, 480.0, 360.0, 240.0, 260.0, 200.0, 420.0, 260.0, 160.0, 240.0, 420.0, 300.0, 300.0, 260.0, 180.0, 240.0, 360.0, 240.0, 240.0, 320.0, 180.0, 280.0, 180.0, 460.0, 260.0, 260.0, 200.0, 240.0, 340.0, 180.0, 160.0, 930.0, 140.0, 320.0, 480.0, 380.0, 180.0, 440.0, 260.0, 200.0, 400.0, 260.0, 260.0, 380.0, 260.0, 240.0, 300.0, 180.0, 320.0, 460.0, 200.0, 440.0, 260.0, 220.0, 300.0, 200.0, 240.0, 340.0, 200.0, 680.0, 660.0, 200.0, 160.0, 160.0, 260.0, 300.0, 200.0, 240.0, 160.0, 280.0, 180.0, 240.0, 200.0, 300.0, 320.0, 420.0, 220.0, 320.0, 200.0, 560.0, 320.0, 480.0, 260.0, 220.0, 440.

episode = 893
episode = 894
episode = 895
episode = 896
episode = 897
episode = 898
episode = 899
episode = 900
episode = 901
episode = 902
episode = 903
episode: 903/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 360.0, 420.0, 260.0, 220.0, 180.0, 180.0, 220.0, 180.0, 200.0, 180.0, 300.0, 1130.0, 320.0, 160.0, 320.0, 800.0, 1480.0, 280.0, 160.0, 380.0, 420.0, 140.0, 180.0, 280.0, 220.0, 320.0, 280.0, 340.0, 480.0, 200.0, 480.0, 360.0, 240.0, 260.0, 200.0, 420.0, 260.0, 160.0, 240.0, 420.0, 300.0, 300.0, 260.0, 180.0, 240.0, 360.0, 240.0, 240.0, 320.0, 180.0, 280.0, 180.0, 460.0, 260.0, 260.0, 200.0, 240.0, 340.0, 180.0, 160.0, 930.0, 140.0, 320.0, 480.0, 380.0, 180.0, 440.0, 260.0, 200.0, 400.0, 260.0, 260.0, 380.0, 260.0, 240.0, 300.0, 180.0, 320.0, 460.0, 200.0, 440.0, 260.0, 220.0, 300.0, 200.0, 240.0, 340.0, 200.0, 680.0, 660.0, 200.0, 160.0, 160.0, 260.0, 300.0, 200.

episode = 933
episode = 934
episode = 935
episode = 936
episode = 937
episode = 938
episode = 939
episode = 940
episode = 941
episode = 942
episode = 943
episode = 944
episode = 945
episode = 946
episode: 946/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 360.0, 420.0, 260.0, 220.0, 180.0, 180.0, 220.0, 180.0, 200.0, 180.0, 300.0, 1130.0, 320.0, 160.0, 320.0, 800.0, 1480.0, 280.0, 160.0, 380.0, 420.0, 140.0, 180.0, 280.0, 220.0, 320.0, 280.0, 340.0, 480.0, 200.0, 480.0, 360.0, 240.0, 260.0, 200.0, 420.0, 260.0, 160.0, 240.0, 420.0, 300.0, 300.0, 260.0, 180.0, 240.0, 360.0, 240.0, 240.0, 320.0, 180.0, 280.0, 180.0, 460.0, 260.0, 260.0, 200.0, 240.0, 340.0, 180.0, 160.0, 930.0, 140.0, 320.0, 480.0, 380.0, 180.0, 440.0, 260.0, 200.0, 400.0, 260.0, 260.0, 380.0, 260.0, 240.0, 300.0, 180.0, 320.0, 460.0, 200.0, 440.0, 260.0, 220.0, 300.0, 200.0, 240.0, 340.0, 200.0, 680.0, 660.

episode = 980
episode = 981
episode = 982
episode = 983
episode = 984
episode = 985
episode = 986
episode = 987
episode = 988
episode = 989
episode: 989/1000, scores: [440.0, 360.0, 240.0, 200.0, 440.0, 320.0, 180.0, 140.0, 380.0, 120.0, 260.0, 320.0, 180.0, 180.0, 180.0, 440.0, 160.0, 260.0, 280.0, 980.0, 140.0, 360.0, 420.0, 260.0, 220.0, 180.0, 180.0, 220.0, 180.0, 200.0, 180.0, 300.0, 1130.0, 320.0, 160.0, 320.0, 800.0, 1480.0, 280.0, 160.0, 380.0, 420.0, 140.0, 180.0, 280.0, 220.0, 320.0, 280.0, 340.0, 480.0, 200.0, 480.0, 360.0, 240.0, 260.0, 200.0, 420.0, 260.0, 160.0, 240.0, 420.0, 300.0, 300.0, 260.0, 180.0, 240.0, 360.0, 240.0, 240.0, 320.0, 180.0, 280.0, 180.0, 460.0, 260.0, 260.0, 200.0, 240.0, 340.0, 180.0, 160.0, 930.0, 140.0, 320.0, 480.0, 380.0, 180.0, 440.0, 260.0, 200.0, 400.0, 260.0, 260.0, 380.0, 260.0, 240.0, 300.0, 180.0, 320.0, 460.0, 200.0, 440.0, 260.0, 220.0, 300.0, 200.0, 240.0, 340.0, 200.0, 680.0, 660.0, 200.0, 160.0, 160.0, 260.0, 300.0, 200.0, 240.0, 160.

In [29]:
# Calculate average score
print('AVERAGE SCORE = {}'.format(np.mean(np.asarray(scores))))

AVERAGE SCORE = 302.70588235294116
