# Connect 4

---

Author: S. Menary [sbmenary@gmail.com]

Date  : 2023-01-15, last edit 2023-01-18

Brief : Develop a bot using a neural network bot with Monte Carlo Tree Search (MCTS)

---

## Notes

- apply random reflections to exploit parity symmetry
- represent all inputs from perspective of current player, so +1 for self and -1 for other

## Imports

In [1]:
###
###  Required imports
###  - all imports should be placed here
###


##  Python core libs
import pickle, sys, time

##  PyPI libs
import numpy as np
from matplotlib import pyplot as plt

##  Local packages
from connect4.utils    import DebugLevel
from connect4.game     import BinaryPlayer, GameBoard, GameResult
from connect4.MCTS     import Node_NeuralMCTS
from connect4.bot      import Bot_NeuralMCTS, Bot_VanillaMCTS
from connect4.parallel import MonitorThread, WorkerThread, kill_threads


In [2]:
###
###  Print version for reproducibility
###

print(f"{'Python'    .rjust(12)} version is {sys.version}")
print(f"{'Numpy'     .rjust(12)} version is {np.__version__}")


      Python version is 3.10.8 | packaged by conda-forge | (main, Nov 22 2022, 08:25:29) [Clang 14.0.6 ]
       Numpy version is 1.23.2


In [3]:
###
###  Global config
###

model_idx = 4
old_model_name = f"../models/.neural_model_v{model_idx-1}.h5"
new_model_name = f"../models/.neural_model_v{model_idx}.h5"

print(f"Using old model: {old_model_name}")
print(f"Using new model: {new_model_name}")


Using old model: ../models/.neural_model_v3.h5
Using new model: ../models/.neural_model_v4.h5


In [4]:

def get_training_data_from_bot_game(model, duration:int=1, discount=1., debug_lvl:DebugLevel=DebugLevel.MUTE) :
    ##  Create game and bot
    game_board = GameBoard()
    bot        = Bot_NeuralMCTS(model) if model else Bot_VanillaMCTS()
    debug_lvl.message(DebugLevel.LOW, f"Using bot {bot}")
    debug_lvl.message(DebugLevel.LOW, game_board)

    ##  Create containers for model input and output
    model_inputs, posteriors, values = [], [], []

    ##  Take moves until end of game, storing model in and target out at each turn
    ##  - values currently equal to +1 if the move is player X and -1 for player O
    ##  -  N.B. we do not invert sign of model_input because this already done by root_node
    result = game_board.get_result()
    while not result :
        bot.take_move(game_board, duration=duration, discount=discount, debug_lvl=debug_lvl)
        debug_lvl.message(DebugLevel.LOW, game_board)
        if model : 
            model_input = bot.root_node.model_input
        else : 
            model_input = bot.root_node.game_board.board.reshape((game_board.horizontal_size, game_board.vertical_size, 1))
            if bot.root_node.player == BinaryPlayer.O : model_input = -model_input
        model_inputs.append(model_input)
        posteriors  .append(bot.root_node.get_posterior_policy())
        values      .append(bot.root_node.player.value)
        result = game_board.get_result()
        
    ##  Resolve values
    backprop_value = result.get_game_score_for_player(BinaryPlayer.X)
    for idx in range(len(values)) :
        values[-1-idx] *= backprop_value
        backprop_value *= discount

    ##  Return containers as np arrays
    return np.array(model_inputs), np.array(posteriors), np.array(values).reshape((len(values),1))


##  Test neural model MCTS

- Test that we can propagate values and make decisions correctly with neural MCTS
- Find a good value for the duration parameter, (smallest value that allows us to make stable posteriors)
- Cannot run these cells when doing regular run, since tf cannot be used in main process before spawning children


In [5]:
###
###  Perform a few MCTS steps
###

'''from connect4.neural import load_model

##  Create game board
game_board = GameBoard()
print(f"\nInitial game board:\n{game_board}")

##  Create a root node at the current game state
model      = load_model(old_model_name)
root_node  = Node_NeuralMCTS(game_board, params=[model, 1.], label="ROOT")

##  Print the initial value tree (should be a ROOT node with no children)
print("Initial tree:")
print(root_node.tree_summary())
print()

##  Perform several MCTS steps with a HIGH debug level
root_node.multi_step_MCTS(num_steps=10, max_sim_steps=-1, discount=0.99, debug_lvl=DebugLevel.MEDIUM)

##  Print the updated value tree 
print("Updated tree:")
print(root_node.tree_summary())
print()'''


'from connect4.neural import load_model\n\n##  Create game board\ngame_board = GameBoard()\nprint(f"\nInitial game board:\n{game_board}")\n\n##  Create a root node at the current game state\nmodel      = load_model(old_model_name)\nroot_node  = Node_NeuralMCTS(game_board, params=[model, 1.], label="ROOT")\n\n##  Print the initial value tree (should be a ROOT node with no children)\nprint("Initial tree:")\nprint(root_node.tree_summary())\nprint()\n\n##  Perform several MCTS steps with a HIGH debug level\nroot_node.multi_step_MCTS(num_steps=10, max_sim_steps=-1, discount=0.99, debug_lvl=DebugLevel.MEDIUM)\n\n##  Print the updated value tree \nprint("Updated tree:")\nprint(root_node.tree_summary())\nprint()'

In [6]:
###
###  Check that a game looks sensible
###

'''
model_inputs, posteriors, values = get_training_data_from_bot_game(model, duration=0.1, discount=0.99)

for inp, pos, val in zip(model_inputs, posteriors, values) :
    print(inp[:,:,0], ",  posterior="+"  ".join([f"{x:.2f}" for x in pos]), f",  value = {val[0]:.3f}")
    '''

'\nmodel_inputs, posteriors, values = get_training_data_from_bot_game(model, duration=0.1, discount=0.99)\n\nfor inp, pos, val in zip(model_inputs, posteriors, values) :\n    print(inp[:,:,0], ",  posterior="+"  ".join([f"{x:.2f}" for x in pos]), f",  value = {val[0]:.3f}")\n    '

In [7]:
###
###  Use MCTS to search for an optimal action
###

'''
game_board = GameBoard()
bot = Bot_NeuralMCTS(model)

while not game_board.get_result() :
    action = bot.choose_action(game_board, duration=3, discount=0.99, debug_lvl=DebugLevel.LOW)
    print("Prior policy:  " + "  ".join([f"{c:.2f}" for c in bot.root_node.child_priors]))
    print("Prior values:  " + "  ".join([f"{c.prior_value:.2f}" for c in bot.root_node.children]))
    game_board.apply_action(action)
    print(game_board)
'''

'\ngame_board = GameBoard()\nbot = Bot_NeuralMCTS(model)\n\nwhile not game_board.get_result() :\n    action = bot.choose_action(game_board, duration=3, discount=0.99, debug_lvl=DebugLevel.LOW)\n    print("Prior policy:  " + "  ".join([f"{c:.2f}" for c in bot.root_node.child_priors]))\n    print("Prior values:  " + "  ".join([f"{c.prior_value:.2f}" for c in bot.root_node.children]))\n    game_board.apply_action(action)\n    print(game_board)\n'

## Multiprocess datapoint generation

In [8]:

def generate_datapoints_process(proc_idx, num_games, out_queue, base_seed, model_name, duration, discount) :
    from connect4.neural import load_model
    np.random.seed(base_seed+proc_idx)
    model = load_model(model_name) if len(model_name) > 0 else None
    for game_idx in range(num_games) :
        _ = get_training_data_from_bot_game(model, duration, discount)
        out_queue.put(_)
                
def generate_datapoints(num_processes, num_games_per_proc, base_seed, model_name, duration, discount, mon_freq=3) :
    worker  = WorkerThread(generate_datapoints_process, num_processes, num_games_per_proc, 
                       func_args=[base_seed, model_name, duration, discount])
    monitor = MonitorThread(worker, frequency=mon_freq)

    monitor.start()
    worker .start()

    worker .join()
    monitor.join()
    
    return worker.results


In [None]:
###
###  Generate training data
###

num_processes      = 7
num_games_per_proc = 2
base_seed          = 10
duration           = 1
discount           = 0.99
monitor_frequency  = 3

results = generate_datapoints(num_processes, num_games_per_proc, base_seed, old_model_name, 
                              duration, discount, monitor_frequency)
                              
##  Retrieve training data from worker thread

model_in = np.concatenate([r[0] for r in results])
model_p  = np.concatenate([r[1] for r in results])
model_v  = np.concatenate([r[2] for r in results])


Generated 6 / 14 results [t=24.04s]

In [None]:
###
###  Load expensive generated data
###
'''
data_fname = f"../data/.training_data_v{model_idx}.pickle"
loaded     = pickle.load(open(data_fname, "wb"))
print(f"Generated data loaded from file: {data_fname}")
'''

In [None]:
###
###  Sanity-check a game
###

test_model_inputs, test_posteriors, test_values = results[0]

for inp, pos, val in zip(test_model_inputs, test_posteriors, test_values) :
    print(inp[:,:,0], ",  posterior="+"  ".join([f"{x:.2f}" for x in pos]), f",  value = {val[0]:.3f}")


In [None]:
###
###  Save expensive generated data
###

data_fname = f"../data/.training_data_v{model_idx}.pickle"
to_save    = {"model_in":model_in, "model_p":model_p, "model_v":model_v}
pickle.dump(to_save, open(data_fname, "wb"))
print(f"Generated data saved to file: {data_fname}")


In [None]:
###
###  Report on the data generated
###

print(f"model_in with shape: {model_in.shape}")
print(f"model_p  with shape: {model_p .shape}")
print(f"model_v  with shape: {model_v .shape}")

In [None]:
###
###  Sanity-check a few datapoints
###

test_indices = np.arange(3) 

print("Print first few inputs:")
for i in test_indices : print(model_in[i,:,:,0])

print("\nPrint first few policies")
for i in test_indices : print(",  ".join([f"{p:.2f}" for p in model_p[i]]))

print("\nPrint first few values")
for i in test_indices : print(",  ".join([f"{v:.2f}" for v in model_v[i]]))


In [None]:
###
###  Data Augmentation and splitting
###

#  Randomly flip board and posterior in x-direction to created augmented dataset reflecting game symmetry
for idx in range(len(model_in)) :
    if np.random.choice([True, False]) : continue
    model_in[idx] = np.flip(model_in[idx], axis=0)
    model_p [idx] = np.flip(model_p [idx], axis=0)

##  Shuffle data
indices = np.arange(len(model_in))
np.random.shuffle(indices)
model_in, model_p, model_v = model_in[indices], model_p [indices], model_v [indices]

##  Split data into train and val sets
num_datapoints = len(model_in)
split_idx = int(0.7*num_datapoints)

train_model_in = model_in[:split_idx]
train_model_p  = model_p [:split_idx]
train_model_v  = model_v [:split_idx]

val_model_in = model_in[split_idx:]
val_model_p  = model_p [split_idx:]
val_model_v  = model_v [split_idx:]

print(f"Created training set of size {len(train_model_v)}")
print(f"Created validation set of size {len(val_model_v)}")


In [None]:
###
###  Create or load a model
###

from connect4.neural import create_model, load_model

'''new_model = create_model(name=new_model_name, num_conv_blocks=4, num_filters=40, num_dense=5, 
                         dense_width=200, batch_norm=True)'''

new_model = load_model(old_model_name)

new_model.summary()


In [None]:
###
###  View a few predictions of this model
###

test_model_p, test_model_v = new_model(val_model_in)
test_model_p, test_model_v = test_model_p.numpy(), test_model_v.numpy()

test_indices = np.arange(10) 

print("Policy cross-check")
for i in test_indices :
    print(",  ".join([f"{p:.2f}" for p in val_model_p[i]]) + "  -->  " + ",  ".join([f"{p:.2f}" for p in test_model_p[i]]))

print("\nValue cross-check")
for i in test_indices :
    print(",  ".join([f"{v:.2f}" for v in val_model_v[i]]) + "  -->  " + ",  ".join([f"{v:.2f}" for v in test_model_v[i]]))


In [None]:
###
###  Fit and save our model!
###

from tensorflow.keras.callbacks import EarlyStopping

history = new_model.fit(
            model_in, [model_p, model_v], epochs=1000, batch_size=100,
            validation_data=(val_model_in, [val_model_p, val_model_v]),
            callbacks=[EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)])

new_model.save(new_model_name)


In [None]:
###
###  View a few predictions of this model
###

test_model_p, test_model_v = new_model(val_model_in)
test_model_p, test_model_v = test_model_p.numpy(), test_model_v.numpy()

test_indices = np.arange(10) 

print("Policy cross-check")
for i in test_indices :
    print(",  ".join([f"{p:.2f}" for p in val_model_p[i]]) + "  -->  " + ",  ".join([f"{p:.2f}" for p in test_model_p[i]]))

print("\nValue cross-check")
for i in test_indices :
    print(",  ".join([f"{v:.2f}" for v in val_model_v[i]]) + "  -->  " + ",  ".join([f"{v:.2f}" for v in test_model_v[i]]))


In [None]:
###
###  Visualise our training curves
###

monitor_pairs = [["loss"       , "val_loss"],
                 ["policy_loss", "val_policy_loss"],
                 ["value_loss" , "val_value_loss"]]

for do_log in [False, True] :
    
    num_axes = len(monitor_pairs)
    fig      = plt.figure(figsize=(4*num_axes, 3))
    for ax_idx, monitor_pair in enumerate(monitor_pairs) :
        ax  = fig.add_subplot(1, num_axes, 1+ax_idx)
        ax.plot(history.history[monitor_pair[0]], "-", lw=3, c="r", alpha=0.5, label=monitor_pair[0])
        ax.plot(history.history[monitor_pair[1]], "-", lw=3, c="b", alpha=0.5, label=monitor_pair[1])
        ax.legend(loc="upper right", frameon=False, fontsize=10)
        ax.set_xlabel("Epoch", labelpad=15, fontsize=11, ha="center", va="top")
        if do_log : ax.set_yscale("log")
    plt.show(fig)
    plt.close(fig)
    

In [None]:
###
###  Play against our old bot!
###

old_model = load_model(old_model_name)

bot_old = Bot_NeuralMCTS(old_model, greedy=True)
bot_new = Bot_NeuralMCTS(new_model, greedy=True)

game_board = GameBoard()
print(game_board)

while not game_board.get_result() :
    if game_board.to_play == BinaryPlayer.X :
        print("OLD BOT TO PLAY:")
        bot = bot_new
    else :
        print("NEW BOT TO PLAY:")
        bot = bot_new
    action = bot.choose_action(game_board, duration=3, discount=0.99, debug_lvl=DebugLevel.LOW)
    print("Prior values:  " + "  ".join([f"{x.prior_value:.3f}" for x in bot.root_node.children]))
    game_board.apply_action(action)
    print(game_board)
    