In [2]:

#encoding: utf-8

##
## cartpole.py
## Gaetan JUVIN 06/24/2017
##
import lakiaro_to_learning as lk
import gym
import random
import os
import numpy as np
from collections      import deque
from keras.models     import Sequential
from keras.layers     import Dense
from keras.optimizers import Adam
from keras.utils import to_categorical

class Agent():
    def __init__(self, state_size, action_size):
        self.weight_backup      = "laki.h5"
        self.state_size         = state_size
        self.action_size        = action_size
        self.memory             = deque(maxlen=2000)
        self.learning_rate      = 0.001
        self.gamma              = 0.95
        self.exploration_rate   = 1.0
        self.exploration_min    = 0.01
        self.exploration_decay  = 0.995
        self.brain              = self._build_model()
        

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(50, input_dim=self.state_size, activation='relu'))
        model.add(Dense(50, activation='relu'))
        #model.add(Dense(50, activation='relu'))
        #model.add(Dense(50, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))

#         if os.path.isfile(self.weight_backup):
#             model.load_weights(self.weight_backup)
#             self.exploration_rate = self.exploration_min
        return model

    def save_model(self):
            self.brain.save(self.weight_backup)

    def act(self, state):
        if np.random.rand() <= self.exploration_rate:
            rtn = random.randint(1,288)
            rtn = to_categorical(rtn)
            rtn = np.argmax(rtn)

            return rtn
        #print(2)

        act_values = self.brain.predict(state)
        #print('predict: ',np.argmax(act_values))
        return np.argmax(act_values)

            
    
    def act_lst_reset(self):
        #self.act_lst = [53,54,55,56,65,66,67,68,77,78,79,80,89,90,91,92,197,198,199,200,209,210,211,212,221,222,223,224,236,237,238,239]
        self.act_lst =[]
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, sample_batch_size):
        if len(self.memory) < sample_batch_size:
            return
        sample_batch = random.sample(self.memory, sample_batch_size)
        for state, action, reward, next_state, done in sample_batch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.brain.predict(next_state)[0])
            target_f = self.brain.predict(state)
            target_f[0][action-1] = target
            self.brain.fit(state, target_f, epochs=1, verbose=0)
        if self.exploration_rate > self.exploration_min:
            self.exploration_rate *= self.exploration_decay

class laki():
    def __init__(self):
        self.lak = lk.run_game(0.1,8)
        self.sample_batch_size = 50
        self.episodes          = 10000

        self.state_size        = 144
        self.action_size       = 288
        self.agent             = Agent(self.state_size, self.action_size)


    def run(self):
        try:
            for index_episode in range(self.episodes):
                state = self.lak.reset()
                state = np.reshape(state, [1, self.state_size])
                #print(state)
                done = False
                index = 0
                reward_a = 0

                while not done:
                    action = self.agent.act(state)
                    #print('action: ',action,np.argmax(action))
                    next_state, reward, done = self.lak.input_xy_click(action)
                    #print(next_state, reward, done)
                    next_state = np.reshape(next_state, [1, self.state_size])
                    self.agent.remember(state, action, reward, next_state, done)
                    state = next_state
                    index += 1
                    reward_a += reward
                print("Episode {}# Score: {}".format(index_episode, reward_a))
                self.agent.replay(self.sample_batch_size)
        finally:
            self.agent.save_model()

if __name__ == "__main__":
    cartpole = laki()
    cartpole.run()


Episode 0# Score: 24
Episode 1# Score: 28
Episode 2# Score: 26
Episode 3# Score: 18
Episode 4# Score: 24
Episode 5# Score: 35
Episode 6# Score: 21
Episode 7# Score: 23
Episode 8# Score: 12
Episode 9# Score: 18
Episode 10# Score: 34
Episode 11# Score: 32
Episode 12# Score: 31
Episode 13# Score: 34
Episode 14# Score: 30
Episode 15# Score: 14
Episode 16# Score: 13
Episode 17# Score: 29
Episode 18# Score: 27
Episode 19# Score: 33
Episode 20# Score: 24
Episode 21# Score: 12
Episode 22# Score: 12
Episode 23# Score: 24
Episode 24# Score: 12
Episode 25# Score: 30
Episode 26# Score: 19
Episode 27# Score: 23
Episode 28# Score: 29
Episode 29# Score: 25
Episode 30# Score: 14
Episode 31# Score: 18
Episode 32# Score: 17
Episode 33# Score: 18
Episode 34# Score: 26
Episode 35# Score: 6
Episode 36# Score: 14
Episode 37# Score: 28
Episode 38# Score: 27
Episode 39# Score: 31
Episode 40# Score: 29
Episode 41# Score: 5
Episode 42# Score: 23
Episode 43# Score: 29
Episode 44# Score: 32
Episode 45# Score: 11


Episode 362# Score: -11
Episode 363# Score: -20
Episode 364# Score: 22
Episode 365# Score: -7
Episode 366# Score: 9
Episode 367# Score: 3
Episode 368# Score: -6
Episode 369# Score: 9
Episode 370# Score: 19
Episode 371# Score: 28
Episode 372# Score: 30
Episode 373# Score: 35
Episode 374# Score: 12
Episode 375# Score: 21
Episode 376# Score: 34
Episode 377# Score: 29
Episode 378# Score: 26
Episode 379# Score: 37
Episode 380# Score: 13
Episode 381# Score: 16
Episode 382# Score: 26
Episode 383# Score: 26
Episode 384# Score: 11
Episode 385# Score: -1
Episode 386# Score: 25
Episode 387# Score: 23
Episode 388# Score: 28
Episode 389# Score: 13
Episode 390# Score: 30
Episode 391# Score: 20
Episode 392# Score: 32
Episode 393# Score: 20
Episode 394# Score: 29
Episode 395# Score: 19
Episode 396# Score: 29
Episode 397# Score: 33
Episode 398# Score: 19
Episode 399# Score: 17
Episode 400# Score: 21
Episode 401# Score: 28
Episode 402# Score: 32
Episode 403# Score: 36
Episode 404# Score: 21
Episode 405#

KeyboardInterrupt: 