## Reward function

In [3]:
import puzzle

def reward(self):
    increment = self.game.increment
    totalScore = self.game.totalScore
    # loose does not work yet
    #loose = self.game.loose
    return increment

## Neural Network Model

In [4]:
import random
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import RMSprop

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=100000)
        self.gamma = 0.9    # discount rate
        self.epsilon = 1.0  # exploration rate
        self.e_decay = .99
        self.e_min = 0.05
        self.learning_rate = 0.01
        self.model = self._build_model()

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(20, input_dim=self.state_size, activation='tanh')) # Adds the first layer with 16 inputs
        model.add(Dense(20, activation='tanh', init='uniform')) # Adds Hidden layer with 20 nodes
        model.add(Dense(self.action_size, activation='linear')) # Adds output layer with 2 nodes
        model.compile(loss='mse',
                      optimizer=RMSprop(lr=self.learning_rate)) # Creates the model from all of the above
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size):
        batch_size = min(batch_size, len(self.memory))
        minibatch = random.sample(self.memory, batch_size)
        X = np.zeros((batch_size, self.state_size))
        Y = np.zeros((batch_size, self.action_size))
        for i in range(batch_size):
            state, action, reward, next_state, done = minibatch[i]
            target = self.model.predict(state)[0]
            if done:
                target[action] = reward
            else:
                target[action] = reward + self.gamma * \
                            np.amax(self.model.predict(next_state)[0])
            X[i], Y[i] = state, target
        self.model.fit(X, Y, batch_size=batch_size, nb_epoch=1, verbose=0)
        if self.epsilon > self.e_min:
            self.epsilon *= self.e_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

Using TensorFlow backend.


## Main Game execution

In [10]:
# %load machine.py
import time
import puzzle


EPISODES = 10


#ACCESS SCORE AS self.game.score
#ACCESS MATRIX AS self.game.matrix
#DECIDE ACTION TO TAKE IN act()
#POSSIBLE ACTIONS:
#	go up:		"'w'"
#	go left:	"'a'"
#	go right:	"'s'"
#	go down:	"'d'"

class Machine:
    game=puzzle.GameGrid() #Game object
    def run(self):
        for i in range(EPISODES):
            
            self.game.restart()
            for t in range(20):
                #self.game.key_down("'w'") #EXAMPLE UNCOMMENT TO RUN
                #print(self.game.score)
                #print(self.game.matrix)
                print(self.reward())
                self.game.key_down(self.act('up')) #COMMENT TO RUN EXAMPLE
                self.game.update_idletasks
                self.game.update()
                time.sleep(0.1)
            
        #print("episode: {}/{}, score: {}, e: {:.2}".format(e, EPISODES, time, agent.epsilon))

    def act(self, direction):
        up = "'w'"
        down = "'s'"
        right = "'d'"
        left = "'a'"
        if direction == 'up': return up
        elif direction == 'down': return down
        elif direction == 'right': return right
        elif direction == 'left': return left
        
    def reward(self):
        increment = self.game.increment
        #totalScore = self.game.score
        loose = self.game.result
        l = 0
        if loose: l = -2*increment
        #print("loose: " + str(loose))
        return (increment + l)
    
    #######################################
    # I made this function to calculate the best next step. This will be the function
    # Determening which state to be given to the neural network.
    # This is a bad implementation since it relies on copying the whole game for every
    # calculation, but it will work to train in the beginning. 
    def calculateNextStep(self, game):
        best_qval = 0
        best_move = 'up' # Default move
        acts = ['up','down','right','left']
        for act in acts: 
            game_copy = game
            game_copy.key_down(self.act(act))
            self.game.update_idletasks
            self.game.update()
            qval = game_copy.reward()
            if qval > best_qval:
                best_qval = qval
                best_move = act
        return best_move
    #######################################
        
our_machine = Machine()
our_machine.run()

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
4
4
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
4
4
0
0
4
4
0
0
0
0
0
0
0
0
0
0
0
0
0
0
4
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
4
4
0
4
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
4
0
0
4
4
8
0
4
4
0
0
0
0
0
0
0
0
0
0
4
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
4
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


In [11]:
# Easier implemtation to start with. I have used this guide: https://keon.io/deep-q-learning/
# The following code is to see that the neural network and deep q network is setup correctly

# 1
############## Simple Neural Net #################
# Here are some more concrete numbers to put into the neural network than in the DQNAgent
model = Sequential()
model.add(Dense(20, input_dim=4, activation='tanh')) # Adds the first layer with 16 inputs
model.add(Dense(20, activation='tanh'))              # Adds Hidden layer with 20 nodes (Removed 'uniform')
model.add(Dense(4, activation='linear'))             # Adds output layer with 2 nodes
model.compile(loss='mse',optimizer=RMSprop(lr=0.01)) # Creates the model from all of the above

#     This training process makes the neural net predict 
#     the reward value (target_f) from a certain state.

state = our_machine.game.matrix  # Game state matrix ( Converting to a 1D array to please model.fit())                      
print(state)


# After some debugging the target has to be a table just like the state
# Therefore I am wondering if we should have the target as the complete theoretical board?
# Or as the best possible next state? 

# Theoretical limit target_f = [[131072, 65536, 32768, 16384], [1024, 2048, 4096, 8192], [512, 256, 128, 64], [4, 8, 16, 32]]
# I we want to use the best possible next state we first have to: 
    # 1: Calculate the next four possibilities
    # 2: Pick the one with highest q-value
    # 3: Use that next state as target_f
    
# For now I will use this random state
target_f = [[0, 0, 2, 0], [0, 0, 0, 0], [0, 0, 2, 0], [0, 0, 0, 0]]

model.fit(state, target_f)        # Fitting the data (Feeding the network)
prediction = model.predict(state) # Predicting the reward from the current state



# 2
############## Deep Q Network (DQN) #################

# So that the neural network don't "forget" the old state with new experiences
# we make a function to remember. (Already implemented perfectly in DQNAgent)
memory = ((state, action, reward, next_state, done))

def remember(self, state, action, reward, next_state, done):
    self.memory.append((state, action, reward, next_state, done))

# Replay trains the neural network by replaying the previous experiences in its memory
# These memories are experienced in batches

batch_size = 5 # Just a random number 
batches = min(batch_size, len(self.memory))
batches = np.random.choice(len(self.memory), batches) # Picks random experiences to make batches of


# Training
    
        

[[2, 4, 2, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


NameError: name 'action' is not defined