**Setting up interface to Minesweeper game**

In [1]:
import jpype
import jpype.imports
from jpype.types import *
import numpy as np

print(jpype.getDefaultJVMPath())

jpype.startJVM(jpype.getDefaultJVMPath(),"-ea", "-Djava.class.path=%s" % ('/Users/apimienta/Documents/Minesweeper AI/train'))

MinesweeperInterface = jpype.JClass("MinesweeperInterface")
MinesweeperGame = jpype.JClass("MinesweeperInterface$MinesweeperGame")

/Library/Java/JavaVirtualMachines/jdk1.8.0_102.jdk/Contents/Home/jre/lib/jli/libjli.dylib


In [3]:
"""Generate game and return a tuple of (game, interface)"""
def generate_new_game():
    new_interface = MinesweeperInterface()
    new_game = MinesweeperGame(new_interface)
    
    return (new_game, new_interface)

"""dels the game for garbage collection"""
def destroy_game(interface, game):
    del interface
    del game
    
"""Get board of game as numpy array"""
def get_np_board(game):
    j_board = game.getPlayerBoard()
    return np.pad(np.array([[[cell] for cell in row] for row in j_board]), ((3, 3), (3, 3), (0, 0)), 'constant') # 0-pad as numpy array

"""Click at index [x][y] and return whether or not it was a valid click"""
def click_game(game, x, y):
    return game.click(x, y)
    
new_game, new_interface = generate_new_game()
print(click_game(new_game, 3, 3))
print(get_np_board(new_game).shape)
destroy_game(new_game, new_interface)

True
(22, 22, 1)


In [27]:
jpype.shutdownJVM()

**Set up model**

In [4]:
from keras import layers
from keras import models
from keras import Model
from keras.optimizers import Adam

def build_model():
    inputs = layers.Input(shape=(22, 22, 1))
    
    x = layers.Conv2D(16, 3, activation='relu')(inputs)
    x = layers.Conv2D(4, 3, activation='relu')(x)
    x = layers.Conv2D(1, 3, activation='linear')(x)
    
    model = Model(inputs=inputs, outputs=x)
    model.compile(loss='mse', optimizer=Adam(lr=0.01), metrics=['mae'])
    
    return model

In [5]:
build_model().summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 22, 22, 1)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 20, 20, 16)        160       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 18, 18, 4)         580       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 16, 16, 1)         37        
Total params: 777
Trainable params: 777
Non-trainable params: 0
_________________________________________________________________


In [None]:
from collections import deque

num_episodes = 10000
eps_decay = 0.5
y = 0.95
decay_factor = 0.8

target_update_interval = 50

target_model = build_model()
prediction_model = build_model()
max_steps = 1000

memory = deque(maxlen=2000)

def get_max_q_index(q_table):
    flattened_ind = q_table.argmax()
    return (flattened_ind%16, int(flattened_ind/16))

def get_max_q_value(q_table):
    x, y = get_max_q_index(q_table)
    return q_table[y][x]

def next_to_possible_mine(state, x, y):
    if state[y][x][0] != -1: return False
    for x_ in range(x-1, x+2):
        for y_ in range(y-1, y+2):
            if state[y_+3][x_+3][0] > 0: return True
            
    return False

rewards_episodes = []
latest_state = 0
old_state = 0
for i in range(1, num_episodes+1):
    if i%target_update_interval == 0: 
        target_model = models.clone_model(prediction_model)
        target_model.build((None, 22, 22, 1))
        target_model.compile(loss='mse', optimizer=Adam(lr=0.01), metrics=['mae'])
        target_model.set_weights(prediction_model.get_weights())
    if i%10 == 0:
        print("%dth episode: median %d" % (i, np.mean(rewards_episodes[-10:])))
        print(num_null)
        num_null = 0
    
    num_null = 0
    game, interface = generate_new_game()
    eps_decay *= decay_factor
    done = False
    total_reward = 0
    steps = 0
    while not done and steps < max_steps:
        state = get_np_board(game)
        # Exploration vs exploitation
        if np.random.random() < eps_decay:
            num_tries = 0
            action = (np.random.randint(0, 16), np.random.randint(0, 16))
            while not next_to_possible_mine(state, action[0], action[1]) and num_tries < 256:
                action = (np.random.randint(0, 16), np.random.randint(0, 16))
                num_tries += 1
        else:
            predict_model_q_table = prediction_model.predict(np.array([state]))[0]
            ind = predict_model_q_table.argmax()
            action = (ind%16, int(ind/16))
        
        # Interact with board and get reward
        success_status = click_game(game, action[0], action[1])
        new_state = get_np_board(game)
        
        latest_state = new_state
        old_state = state
        
        reward = 10 if success_status else 0
        if state[action[1]+3][action[0]+3][0] != -1 and success_status:
            num_null += 1
            reward = 5
            if num_null > 1000: break
        
        # Find target q table
        target = reward + y * get_max_q_value(target_model.predict(np.array([new_state]))[0])
        target_qtable = target_model.predict(np.array([state]))[0]
        
        target_qtable[action[1]][action[0]][0] = target[0]
        
        # Fit
        prediction_model.fit(np.array([state]), np.array([target_qtable]), epochs=1, verbose=False)
        
        done = (not success_status) or game.isSolved()
        total_reward += reward
        
        steps += 1
        
        #if np.random.randint(0, 2) == 0: memory.appendleft()
        
    rewards_episodes.append(total_reward)
    destroy_game(interface, game)
    
    

10th episode: median 100
18
20th episode: median 702
39


In [12]:
print(list(latest_state))
print(list(old_state))

[array([[0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0]]), array([[0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0]]), array([[0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0]]), array([[ 0],
       [ 0],
       [ 0],
       [-1],
       [-1],
       [-1],
       [-1],
       [-1],
       [-1],
       [-1],
       [-1],
       [-1],
       [-1],
       [-1],
       [-1],
      