In [5]:
# run pip install -e gym-reversi in terminal to install Custom Gym Env.
# Initialise game environment
import gym
import gym_reversi
import numpy as np
import random
import time

def opponent(board, avail):
    dummy=np.where(avail==np.amax(avail))
    # dummy=np.where(avail>0)
    maxavail = list(zip(dummy[0],dummy[1]))
    move = random.choice(maxavail)    
    return move

env = gym.make("reversi-v0", opponent = opponent, AI_Player = 1)

st = env.reset()


done = False

t1 = time.time()
while not done:
    avail = env.valid_moves(env.AI_Player)
    board = env.board
    move = opponent(board, avail)
    
    a = env.board_pos_to_actions_ind(move)
    _,_,done,_ = env.step(a)
print(time.time() - t1)
# print(env.action_space)
# env.render()

# env.get_state().shape
# env.get_actions(1)

0.15170907974243164


In [10]:
import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, MaxPooling2D, Conv2D, Reshape
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

import tensorflow as tf
from keras import backend

config = tf.ConfigProto( device_count = {'GPU': 1 , 'CPU': 4} ) 
sess = tf.Session(config=config) 
backend.set_session(sess)

nb_actions = len(env.action_space)
state_shape = env.state_shape

print(nb_actions)
print(state_shape)

model = Sequential()
model.add(Reshape(input_shape=(1, 3, env.size, env.size), target_shape=(env.size, env.size, 3)))
model.add(Conv2D(64, kernel_size=(3, 3), input_shape=(env.size, env.size, 3), activation='relu')) # 3 filters? Parameters
model.add(Conv2D(64, kernel_size=(3, 3), input_shape=state_shape, activation='relu')) # 3 filters? Parameters
model.add(Conv2D(128, kernel_size=(2, 2), input_shape=state_shape, activation='relu'))
model.add(Conv2D(128, kernel_size=(3, 3), input_shape=state_shape, activation='relu'))
model.add(Flatten())
# model.add(Dense(128, activation='relu'))
model.add(Dense(nb_actions, activation='linear'))
print(model.output.shape)
model.summary()

60
(3, 8, 8)
(?, 60)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_5 (Reshape)          (None, 8, 8, 3)           0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 6, 6, 64)          1792      
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 4, 4, 64)          36928     
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 3, 3, 128)         32896     
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 1, 1, 128)         147584    
_________________________________________________________________
flatten_5 (Flatten)          (None, 128)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 60)                

In [3]:
import os.path
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)

policy = EpsGreedyQPolicy(eps = 0.01)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

if os.path.isfile('dqn_reversi_weights.h5f'):
    dqn.load_weights('dqn_reversi_weights.h5f')




Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [None]:
# Train the model
dqn.fit(env, nb_steps=100000, visualize=False, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_reversi_weights.h5f', overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=False)

In [6]:
# Evaluate AI

import time
from IPython.display import clear_output

st_0 = env.reset()
st = np.zeros((1,1,3,8,8))
st[2:4] = st_0

winner_counter = {
    "AI":0,
    "Tie":0,
    "Opponent":0
}

for test_games in range(10000):
    actions = model.predict(st)

    valid_actions = env.get_actions(env.AI_Player)
    actions = actions.flatten()[valid_actions]

    a = valid_actions[np.argmax(actions)]
    
    st_0, r, done, _ = env.step(a)
    st[2:4] = st_0
    
    env.render()
    if done:
        count = np.sum(env.board)
        if count == 0:
            winner = "Tie"
        elif env.AI_Player * count > 0:
            winner = "AI"
        else:
            winner = "Opponent"
    
        winner_counter[winner] += 1
        clear_output()
        print("Winner: {}".format(winner))
        print("AI: {}, Tie: {}, Opponent: {}".format(winner_counter["AI"], winner_counter["Tie"], winner_counter["Opponent"]))
        
        
        time.sleep(0.2)
        env.reset()

Winner: AI
AI: 23, Tie: 0, Opponent: 9


KeyboardInterrupt: 

In [5]:
# Close window (and environment)
env.close()

In [None]:
# Human vs AI play
import numpy as np
import time
import random
import reversi_renderer as renderer
from logic_UPDATED import get_turn_list, creat_board, valid_moves

board = creat_board(8)
player = 1
human_player = -1

def board_moved(pos): 
    global board, player, human_player
    
    avail = valid_moves(board , player)
    if np.amax(avail) == 0:
        print("No moves available...")
        player *= -1
    elif player == human_player:
        print("HUMAN")
        if avail[pos] > 0:
            board[pos] = player
            turn_list = get_turn_list(pos, board , player)
            for i in turn_list:
                board[i] = player
            player *= -1
            renderer.paint_board(board, highlight_pos = pos)
            time.sleep(0.1)
            board_moved(None) # Call AI player
        else:
            print("Move not available...")        
    else:
        print("AI")
        dummy=np.where(avail==np.amax(avail))
        maxavail = list(zip(dummy[0],dummy[1]))
        move = random.choice(maxavail)
        turn_list = get_turn_list(move, board , player)
        board[move] = player
        for i in turn_list:
            board[i] = player     
        renderer.paint_board(board, highlight_pos = move)
        player *= -1

renderer.timer_event = None
renderer.clicked_event = board_moved
renderer.Start(board)

In [None]:
# Human vs Human
import numpy as np
import time
import random
import reversi_renderer as renderer
from logic_UPDATED import get_turn_list, creat_board, valid_moves

board = creat_board(8)
player = 1

def board_moved(pos): 
    global board, player, human_player
    
    avail = valid_moves(board , player)
    if np.amax(avail) == 0:
        print("No moves available...")
        player *= -1
    if avail[pos] > 0:
        board[pos] = player
        turn_list = get_turn_list(pos, board , player)
        for i in turn_list:
            board[i] = player
        player *= -1
        renderer.paint_board(board, highlight_pos = pos)
    else:
        print("Move not available...")        

renderer.timer_event = None
renderer.clicked_event = board_moved
renderer.Start(board)

In [None]:
# Random play
import numpy as np
import random
import reversi_renderer as renderer
from logic_UPDATED import get_turn_list, creat_board, valid_moves

board = creat_board(8)
player = -1
random.seed(42)

def play_random():
    global board, player
    
    player *= -1
    avail = valid_moves(board , player)
    if np.amax(avail) == 0:
        
        
        renderer.Close()
    else:
        dummy=np.where(avail==np.amax(avail))
        maxavail = list(zip(dummy[0],dummy[1]))
        move = random.choice(maxavail)
        turn_list = get_turn_list(move, board , player)
        board[move] = player
        for i in turn_list:
            board[i] = player     
        renderer.paint_board(board, highlight_pos = move)
        
paused = False
def pause(pos):
    global paused
    paused = not paused
    if paused:
        renderer.timer_event = None
        print("Paused...")
    else:
        renderer.timer_event = play_random
        print("Unpaused...")
    
renderer.timer_event = play_random
renderer.timer_pause = 300
renderer.clicked_event = pause
renderer.Start(board)

TypeError: Error when checking model : data should be a Numpy array, or list/dict of Numpy arrays. Found: 1...