# Connect 4

---

Author: S. Menary [sbmenary@gmail.com]

Date  : 2023-01-15, last edit 2023-01-18

Brief : Develop a bot using a neural network bot with Monte Carlo Tree Search (MCTS)

---

## Notes

- apply random reflections to exploit parity symmetry
- represent all inputs from perspective of current player, so +1 for self and -1 for other

## Imports

In [1]:
###
###  Required imports
###  - all imports should be placed here
###


##  Python core libs
import pickle, sys, time

##  PyPI libs
import numpy as np
from matplotlib import pyplot as plt

##  Local packages
from connect4.utils    import DebugLevel
from connect4.game     import BinaryPlayer, GameBoard, GameResult
from connect4.MCTS     import Node_NeuralMCTS
from connect4.bot      import Bot_NeuralMCTS, Bot_VanillaMCTS
from connect4.parallel import generate_from_processes


In [2]:
###
###  Print version for reproducibility
###

print(f"{'Python'    .rjust(12)} version is {sys.version}")
print(f"{'Numpy'     .rjust(12)} version is {np.__version__}")


      Python version is 3.10.8 | packaged by conda-forge | (main, Nov 22 2022, 08:25:29) [Clang 14.0.6 ]
       Numpy version is 1.23.2


In [3]:
###
###  Global config
###

model_idx = 5
old_model_name = f"../models/.neural_model_v{model_idx-1}.h5"
new_model_name = f"../models/.neural_model_v{model_idx}.h5"

print(f"Using old model: {old_model_name}")
print(f"Using new model: {new_model_name}")


Using old model: ../models/.neural_model_v4.h5
Using new model: ../models/.neural_model_v5.h5


In [4]:

def get_training_data_from_bot_game(model, duration:int=1, discount=1., debug_lvl:DebugLevel=DebugLevel.MUTE) :
    ##  Create game and bot
    game_board = GameBoard()
    bot        = Bot_NeuralMCTS(model) if model else Bot_VanillaMCTS()
    debug_lvl.message(DebugLevel.LOW, f"Using bot {bot}")
    debug_lvl.message(DebugLevel.LOW, game_board)

    ##  Create containers for model input and output
    model_inputs, posteriors, values = [], [], []

    ##  Take moves until end of game, storing model in and target out at each turn
    ##  - values currently equal to +1 if the move is player X and -1 for player O
    ##  -  N.B. we do not invert sign of model_input because this already done by root_node
    result = game_board.get_result()
    while not result :
        bot.take_move(game_board, duration=duration, discount=discount, debug_lvl=debug_lvl)
        debug_lvl.message(DebugLevel.LOW, game_board)
        if model : 
            model_input = bot.root_node.model_input
        else : 
            model_input = bot.root_node.game_board.board.reshape((game_board.horizontal_size, game_board.vertical_size, 1))
            if bot.root_node.player == BinaryPlayer.O : model_input = -model_input
        model_inputs.append(model_input)
        posteriors  .append(bot.root_node.get_posterior_policy())
        values      .append(bot.root_node.player.value)
        result = game_board.get_result()
        
    ##  Resolve values
    backprop_value = result.get_game_score_for_player(BinaryPlayer.X)
    for idx in range(len(values)) :
        values[-1-idx] *= backprop_value
        backprop_value *= discount

    ##  Return containers as np arrays
    return np.array(model_inputs), np.array(posteriors), np.array(values).reshape((len(values),1))


##  Test neural model MCTS

- Test that we can propagate values and make decisions correctly with neural MCTS
- Find a good value for the duration parameter, (smallest value that allows us to make stable posteriors)
- Cannot run these cells when doing regular run, since tf cannot be used in main process before spawning children


In [5]:
###
###  Perform a few MCTS steps
###

'''from connect4.neural import load_model

##  Create game board
game_board = GameBoard()
print(f"\nInitial game board:\n{game_board}")

##  Create a root node at the current game state
model      = load_model(old_model_name)
root_node  = Node_NeuralMCTS(game_board, params=[model, 1.], label="ROOT")

##  Print the initial value tree (should be a ROOT node with no children)
print("Initial tree:")
print(root_node.tree_summary())
print()

##  Perform several MCTS steps with a HIGH debug level
root_node.multi_step_MCTS(num_steps=10, max_sim_steps=-1, discount=0.99, debug_lvl=DebugLevel.MEDIUM)

##  Print the updated value tree 
print("Updated tree:")
print(root_node.tree_summary())
print()'''


'from connect4.neural import load_model\n\n##  Create game board\ngame_board = GameBoard()\nprint(f"\nInitial game board:\n{game_board}")\n\n##  Create a root node at the current game state\nmodel      = load_model(old_model_name)\nroot_node  = Node_NeuralMCTS(game_board, params=[model, 1.], label="ROOT")\n\n##  Print the initial value tree (should be a ROOT node with no children)\nprint("Initial tree:")\nprint(root_node.tree_summary())\nprint()\n\n##  Perform several MCTS steps with a HIGH debug level\nroot_node.multi_step_MCTS(num_steps=10, max_sim_steps=-1, discount=0.99, debug_lvl=DebugLevel.MEDIUM)\n\n##  Print the updated value tree \nprint("Updated tree:")\nprint(root_node.tree_summary())\nprint()'

In [6]:
###
###  Check that a game looks sensible
###

'''
model_inputs, posteriors, values = get_training_data_from_bot_game(model, duration=0.1, discount=0.99)

for inp, pos, val in zip(model_inputs, posteriors, values) :
    print(inp[:,:,0], ",  posterior="+"  ".join([f"{x:.2f}" for x in pos]), f",  value = {val[0]:.3f}")
    '''

'\nmodel_inputs, posteriors, values = get_training_data_from_bot_game(model, duration=0.1, discount=0.99)\n\nfor inp, pos, val in zip(model_inputs, posteriors, values) :\n    print(inp[:,:,0], ",  posterior="+"  ".join([f"{x:.2f}" for x in pos]), f",  value = {val[0]:.3f}")\n    '

In [7]:
###
###  Use MCTS to search for an optimal action
###

'''
game_board = GameBoard()
bot = Bot_NeuralMCTS(model)

while not game_board.get_result() :
    action = bot.choose_action(game_board, duration=3, discount=0.99, debug_lvl=DebugLevel.LOW)
    print("Prior policy:  " + "  ".join([f"{c:.2f}" for c in bot.root_node.child_priors]))
    print("Prior values:  " + "  ".join([f"{c.prior_value:.2f}" for c in bot.root_node.children]))
    game_board.apply_action(action)
    print(game_board)
'''

'\ngame_board = GameBoard()\nbot = Bot_NeuralMCTS(model)\n\nwhile not game_board.get_result() :\n    action = bot.choose_action(game_board, duration=3, discount=0.99, debug_lvl=DebugLevel.LOW)\n    print("Prior policy:  " + "  ".join([f"{c:.2f}" for c in bot.root_node.child_priors]))\n    print("Prior values:  " + "  ".join([f"{c.prior_value:.2f}" for c in bot.root_node.children]))\n    game_board.apply_action(action)\n    print(game_board)\n'

## Multiprocess datapoint generation

In [8]:

def generate_datapoints_process(proc_idx, num_games, out_queue, argv) :
    from connect4.neural import load_model
    model_name, duration, discount, base_seed = argv
    np.random.seed(base_seed+proc_idx)
    model = load_model(model_name) if len(model_name) > 0 else None
    for game_idx in range(num_games) :
        _ = get_training_data_from_bot_game(model, duration, discount)
        out_queue.put(_)


In [9]:
###
###  Generate training data
###

num_processes      = 7
num_games_per_proc = 3
base_seed          = 10
duration           = .5
discount           = 0.99
monitor_frequency  = 3

results = generate_from_processes(
    func                 = generate_datapoints_process, 
    func_args            = [old_model_name, duration, discount, base_seed],
    num_proc             = num_processes, 
    num_results_per_proc = num_games_per_proc, 
    mon_freq             = monitor_frequency)
                              
##  Retrieve training data from worker thread

model_in = np.concatenate([r[0] for r in results])
model_p  = np.concatenate([r[1] for r in results])
model_v  = np.concatenate([r[2] for r in results])


Generated 21 / 21 results [t=45.06s]
Generation complete [t=45.06s] [n=21]


In [10]:
###
###  Load expensive generated data
###
'''
data_fname = f"../data/.training_data_v{model_idx}.pickle"
loaded     = pickle.load(open(data_fname, "wb"))
print(f"Generated data loaded from file: {data_fname}")
'''

'\ndata_fname = f"../data/.training_data_v{model_idx}.pickle"\nloaded     = pickle.load(open(data_fname, "wb"))\nprint(f"Generated data loaded from file: {data_fname}")\n'

In [11]:
###
###  Sanity-check a game
###

test_model_inputs, test_posteriors, test_values = results[0]

for inp, pos, val in zip(test_model_inputs, test_posteriors, test_values) :
    print(inp[:,:,0], ",  posterior="+"  ".join([f"{x:.2f}" for x in pos]), f",  value = {val[0]:.3f}")


[[0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]] ,  posterior=0.03  0.06  0.03  0.76  0.03  0.06  0.03 ,  value = -0.895
[[ 0  0  0  0  0  0]
 [-1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]] ,  posterior=0.03  0.03  0.03  0.83  0.03  0.03  0.03 ,  value = 0.904
[[-1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]] ,  posterior=0.02  0.02  0.02  0.86  0.02  0.02  0.02 ,  value = -0.914
[[ 1  0  0  0  0  0]
 [-1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [-1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]] ,  posterior=0.04  0.11  0.04  0.13  0.63  0.02  0.02 ,  value = 0.923
[[-1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [-1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]] ,  posterior=0.0

In [12]:
###
###  Save expensive generated data
###

data_fname = f"../data/.training_data_v{model_idx}.pickle"
to_save    = {"model_in":model_in, "model_p":model_p, "model_v":model_v}
pickle.dump(to_save, open(data_fname, "wb"))
print(f"Generated data saved to file: {data_fname}")


Generated data saved to file: ../data/.training_data_v5.pickle


In [13]:
###
###  Report on the data generated
###

print(f"model_in with shape: {model_in.shape}")
print(f"model_p  with shape: {model_p .shape}")
print(f"model_v  with shape: {model_v .shape}")

model_in with shape: (431, 7, 6, 1)
model_p  with shape: (431, 7)
model_v  with shape: (431, 1)


In [14]:
###
###  Sanity-check a few datapoints
###

test_indices = np.arange(3) 

print("Print first few inputs:")
for i in test_indices : print(model_in[i,:,:,0])

print("\nPrint first few policies")
for i in test_indices : print(",  ".join([f"{p:.2f}" for p in model_p[i]]))

print("\nPrint first few values")
for i in test_indices : print(",  ".join([f"{v:.2f}" for v in model_v[i]]))


Print first few inputs:
[[0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]]
[[ 0  0  0  0  0  0]
 [-1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]]
[[-1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]]

Print first few policies
0.03,  0.06,  0.03,  0.76,  0.03,  0.06,  0.03
0.03,  0.03,  0.03,  0.83,  0.03,  0.03,  0.03
0.02,  0.02,  0.02,  0.86,  0.02,  0.02,  0.02

Print first few values
-0.90
0.90
-0.91


In [15]:
###
###  Data Augmentation and splitting
###

#  Randomly flip board and posterior in x-direction to created augmented dataset reflecting game symmetry
for idx in range(len(model_in)) :
    if np.random.choice([True, False]) : continue
    model_in[idx] = np.flip(model_in[idx], axis=0)
    model_p [idx] = np.flip(model_p [idx], axis=0)

##  Shuffle data
indices = np.arange(len(model_in))
np.random.shuffle(indices)
model_in, model_p, model_v = model_in[indices], model_p [indices], model_v [indices]

##  Split data into train and val sets
num_datapoints = len(model_in)
split_idx = int(0.7*num_datapoints)

train_model_in = model_in[:split_idx]
train_model_p  = model_p [:split_idx]
train_model_v  = model_v [:split_idx]

val_model_in = model_in[split_idx:]
val_model_p  = model_p [split_idx:]
val_model_v  = model_v [split_idx:]

print(f"Created training set of size {len(train_model_v)}")
print(f"Created validation set of size {len(val_model_v)}")


Created training set of size 301
Created validation set of size 130


In [16]:
###
###  Create or load a model
###

from connect4.neural import create_model, load_model

'''new_model = create_model(name=new_model_name, num_conv_blocks=4, num_filters=40, num_dense=5, 
                         dense_width=200, batch_norm=True)'''

new_model = load_model(old_model_name)

new_model.summary()


Model: "../models/.neural_model_v2.h5"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 game_board_input (InputLayer)  [(None, 7, 6, 1)]    0           []                               
                                                                                                  
 conv0_conv2d (Conv2D)          (None, 7, 6, 40)     400         ['game_board_input[0][0]']       
                                                                                                  
 conv0_skipconnect (Concatenate  (None, 7, 6, 41)    0           ['conv0_conv2d[0][0]',           
 )                                                                'game_board_input[0][0]']       
                                                                                                  
 conv0_dropout (Dropout)        (None, 7, 6, 41)     0           ['con

In [17]:
###
###  View a few predictions of this model
###

test_model_p, test_model_v = new_model(val_model_in)
test_model_p, test_model_v = test_model_p.numpy(), test_model_v.numpy()

test_indices = np.arange(10) 

print("Policy cross-check")
for i in test_indices :
    print(",  ".join([f"{p:.2f}" for p in val_model_p[i]]) + "  -->  " + ",  ".join([f"{p:.2f}" for p in test_model_p[i]]))

print("\nValue cross-check")
for i in test_indices :
    print(",  ".join([f"{v:.2f}" for v in val_model_v[i]]) + "  -->  " + ",  ".join([f"{v:.2f}" for v in test_model_v[i]]))


Policy cross-check
0.02,  0.02,  0.76,  0.04,  0.11,  0.02,  0.02  -->  0.20,  0.14,  0.24,  0.05,  0.31,  0.03,  0.03
0.02,  0.02,  0.02,  0.85,  0.02,  0.02,  0.02  -->  0.00,  0.00,  0.01,  0.97,  0.01,  0.00,  0.00
0.02,  0.52,  0.02,  0.31,  0.02,  0.10,  0.02  -->  0.00,  0.01,  0.00,  0.00,  0.95,  0.03,  0.01
0.02,  0.02,  0.02,  0.02,  0.88,  0.02,  0.02  -->  0.00,  0.00,  0.01,  0.01,  0.88,  0.08,  0.00
0.02,  0.02,  0.63,  0.13,  0.04,  0.11,  0.04  -->  0.01,  0.04,  0.39,  0.09,  0.39,  0.07,  0.01
0.02,  0.02,  0.02,  0.90,  0.02,  0.02,  0.02  -->  0.00,  0.00,  0.01,  0.97,  0.01,  0.00,  0.00
0.07,  0.04,  0.02,  0.02,  0.76,  0.07,  0.02  -->  0.05,  0.06,  0.03,  0.18,  0.40,  0.09,  0.19
0.02,  0.53,  0.02,  0.38,  0.02,  0.02,  0.02  -->  0.02,  0.77,  0.07,  0.07,  0.03,  0.03,  0.01
0.02,  0.05,  0.52,  0.14,  0.20,  0.05,  0.02  -->  0.04,  0.10,  0.29,  0.07,  0.34,  0.11,  0.04
0.03,  0.05,  0.61,  0.16,  0.08,  0.05,  0.03  -->  0.00,  0.00,  0.01,  0.55,  

In [18]:
###
###  Fit and save our model!
###

from tensorflow.keras.callbacks import EarlyStopping

history = new_model.fit(
            model_in, [model_p, model_v], epochs=1000, batch_size=100,
            validation_data=(val_model_in, [val_model_p, val_model_v]),
            callbacks=[EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)])

new_model.save(new_model_name)


Epoch 1/1000


2023-01-19 10:08:54.956894: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000


Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000
Epoch 74/1000
Epoch 75/1000


KeyboardInterrupt: 

In [None]:
###
###  View a few predictions of this model
###

test_model_p, test_model_v = new_model(val_model_in)
test_model_p, test_model_v = test_model_p.numpy(), test_model_v.numpy()

test_indices = np.arange(10) 

print("Policy cross-check")
for i in test_indices :
    print(",  ".join([f"{p:.2f}" for p in val_model_p[i]]) + "  -->  " + ",  ".join([f"{p:.2f}" for p in test_model_p[i]]))

print("\nValue cross-check")
for i in test_indices :
    print(",  ".join([f"{v:.2f}" for v in val_model_v[i]]) + "  -->  " + ",  ".join([f"{v:.2f}" for v in test_model_v[i]]))


In [None]:
###
###  Visualise our training curves
###

monitor_pairs = [["loss"       , "val_loss"],
                 ["policy_loss", "val_policy_loss"],
                 ["value_loss" , "val_value_loss"]]

for do_log in [False, True] :
    
    num_axes = len(monitor_pairs)
    fig      = plt.figure(figsize=(4*num_axes, 3))
    for ax_idx, monitor_pair in enumerate(monitor_pairs) :
        ax  = fig.add_subplot(1, num_axes, 1+ax_idx)
        ax.plot(history.history[monitor_pair[0]], "-", lw=3, c="r", alpha=0.5, label=monitor_pair[0])
        ax.plot(history.history[monitor_pair[1]], "-", lw=3, c="b", alpha=0.5, label=monitor_pair[1])
        ax.legend(loc="upper right", frameon=False, fontsize=10)
        ax.set_xlabel("Epoch", labelpad=15, fontsize=11, ha="center", va="top")
        if do_log : ax.set_yscale("log")
    plt.show(fig)
    plt.close(fig)
    