# Train neural MCTS

---

Author: S. Menary [sbmenary@gmail.com]

Date  : 2023-01-15, last edit 2023-01-19

Brief : Train a bot neural network by playing games using a previous bot generation (vanilla MCTS if this is the first iteration)

---

## Imports

---

In [1]:
##=====================================##
##  All imports should be placed here  ##
##=====================================##

##  Python core libs
import pickle, sys, time

##  PyPI libs
import numpy as np
from matplotlib import pyplot as plt

##  Local packages
from connect4.utils    import DebugLevel
from connect4.game     import BinaryPlayer, GameBoard, GameResult
from connect4.MCTS     import Node_NeuralMCTS, PolicyStrategy
from connect4.bot      import Bot_NeuralMCTS, Bot_VanillaMCTS
from connect4.parallel import generate_from_processes
from connect4.methods  import get_training_data_from_bot_game


In [2]:
##=====================================##
##  Print version for reproducibility  ##
##=====================================##

print(f"{'Python'    .rjust(12)} version is {sys.version}")
print(f"{'Numpy'     .rjust(12)} version is {np.__version__}")


      Python version is 3.10.8 | packaged by conda-forge | (main, Nov 22 2022, 08:25:29) [Clang 14.0.6 ]
       Numpy version is 1.23.2


---

##  Configure run

---

In [3]:
##============================##
##  Set global config values  ##
##============================##

model_idx      = 6
old_model_name = f"../models/.neural_model_v{model_idx-1}.h5"
new_model_name = f"../models/.neural_model_v{model_idx}.h5"

load_data  = False
save_data  = not load_data
data_fname = f"../data/.training_data_v{model_idx}.pickle"

tune_previous_model = True       ## whether to load+tune previous model or create+train new one

num_processes      = 7
num_games_per_proc = 300
base_seed          = int(time.time())
duration           = 3
discount           = 0.99
monitor_frequency  = 3
num_random_moves   = 2
policy_strategy    = PolicyStrategy.NOISY_POSTERIOR_POLICY

print(f"Using old model: {old_model_name}")
print(f"Using new model: {new_model_name}")
print(f"Using data file: {data_fname}")


Using old model: ../models/.neural_model_v5.h5
Using new model: ../models/.neural_model_v6.h5
Using data file: ../data/.training_data_v6.pickle


---

## Data generation and preprocessing

---

In [4]:
##========================================================================##
##  Define method allowing data to be generated in parallel subprocesses  ##
##========================================================================##

##  N.B. connect4.neural import must be performed inside the method executed inside each child process
##  to avoid a deadlock caused when a tf session has already been created in __main__

def generate_datapoints_process(proc_idx, num_games, out_queue, func_args) :
    from connect4.neural import load_model
    model_name, duration, discount, num_random_moves, policy_strategy, base_seed = func_args
    np.random.seed(base_seed+proc_idx)
    model = load_model(model_name) if len(model_name) > 0 else None
    for game_idx in range(num_games) :
        _ = get_training_data_from_bot_game(model, duration, discount, num_random_moves)
        out_queue.put(_)


In [None]:
##======================##
##  Generate/load data  ##
##======================##

if load_data :
    ##  Load data from pickle file
    print(f"Loading data from file: {data_fname}")
    loaded   = pickle.load(open(data_fname, "rb"))
    model_in = loaded["model_in"]
    model_p  = loaded["model_p" ]
    model_v  = loaded["model_v" ]
else :
    ##  Run subprocesses to generate new data
    print(f"Generating {num_processes*num_games_per_proc} datapoints with base seed {base_seed}")
    print(f"Using duration = {duration:.3f}\nUsing discount = {discount:.3f}")
    results = generate_from_processes(
        func                 = generate_datapoints_process, 
        func_args            = [old_model_name, duration, discount, num_random_moves, policy_strategy, base_seed],
        num_proc             = num_processes, 
        num_results_per_proc = num_games_per_proc, 
        mon_freq             = monitor_frequency)
                              
    ##  Retrieve training data from worker thread
    model_in = np.concatenate([r[0] for r in results])
    model_p  = np.concatenate([r[1] for r in results])
    model_v  = np.concatenate([r[2] for r in results])
    

Generating 2100 datapoints with base seed 1674161700
Using duration = 3.000
Using discount = 0.990
Generated 0 / 2100 results [t=6.01s]

In [None]:
##=============##
##  Save data  ##
##=============##

if save_data :
    print(f"Saving data to file: {data_fname}")
    to_save = {"model_in":model_in, "model_p":model_p, "model_v":model_v, 
               "base_seed":base_seed, "duration":duration, "discount":discount}
    pickle.dump(to_save, open(data_fname, "wb"))
else :
    print("Not saving data because it was loaded from file")
    

In [None]:
##================================##
##  Report on the data generated  ##
##================================##

print(f"model_in with shape: {model_in.shape}")
print(f"model_p  with shape: {model_p .shape}")
print(f"model_v  with shape: {model_v .shape}")

In [None]:
##===================================##
##  Data Augmentation and splitting  ##
##===================================##

##  Randomly flip board and posterior in x-direction to created augmented dataset reflecting game symmetry
for idx in range(len(model_in)) :
    if np.random.choice([True, False]) : continue
    model_in[idx] = np.flip(model_in[idx], axis=0)
    model_p [idx] = np.flip(model_p [idx], axis=0)

##  Shuffle data
indices = np.arange(len(model_in))
np.random.shuffle(indices)
model_in, model_p, model_v = model_in[indices], model_p [indices], model_v [indices]

##  Split data into train and val sets
num_datapoints = len(model_in)
split_idx = int(0.7*num_datapoints)

train_model_in = model_in[:split_idx]
train_model_p  = model_p [:split_idx]
train_model_v  = model_v [:split_idx]

val_model_in = model_in[split_idx:]
val_model_p  = model_p [split_idx:]
val_model_v  = model_v [split_idx:]

print(f"Created training set of size {len(train_model_v)}")
print(f"Created validation set of size {len(val_model_v)}")


---

## Model training

---

In [None]:
##=========================##
##  Create/load the model  ##
##=========================##

##  Must import neural module here because it uses tensorflow, which breaks the subprocessing if done above
from connect4.neural import create_model, load_model

##  Load or create model
if tune_previous_model :
    print("Loading model from previous iteration")
    new_model = load_model(old_model_name)
else :
    print("Creating new model")
    new_model = create_model(name=new_model_name, num_conv_blocks=4, num_filters=40, num_dense=5, 
                         dense_width=200, batch_norm=True)

##  Print model summary
new_model.summary()


In [None]:
##==================##
##  Model training  ##
##==================##

##  Must import tensorflow modules here, because it breaks the subprocessing if done above
from tensorflow.keras.callbacks import EarlyStopping

##  Train the model
history = new_model.fit(
            model_in, [model_p, model_v], epochs=1000, batch_size=100,
            validation_data=(val_model_in, [val_model_p, val_model_v]),
            callbacks=[EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)])

##  Save the new model
new_model.save(new_model_name)


In [None]:
##=================================##
##  Visualise the training curves  ##
##=================================##

##  Define metrics to track
metrics     = ["loss", "policy_loss", "value_loss"]
num_metrics = len(metrics)

##  Plot metrics on both linear and log scales
for do_log in [False, True] :
    
    ##  Create, show and close figure showing training curves (one axis per metric)
    fig      = plt.figure(figsize=(4*num_metrics, 3))
    for ax_idx, metric in enumerate(metrics) :
        val_metric = f"val_{metric}"
        ax  = fig.add_subplot(1, num_metrics, 1+ax_idx)
        ax.plot(history.history[metric], "-", lw=3, c="r", alpha=0.5, label=metric)
        if val_metric in history.history :
            ax.plot(history.history[val_metric], "-", lw=3, c="b", alpha=0.5, label=val_metric)
        ax.legend(loc="upper right", frameon=False, fontsize=10)
        ax.set_xlabel("Epoch", labelpad=15, fontsize=11, ha="center", va="top")
        if do_log : ax.set_yscale("log")
    plt.show(fig)
    plt.close(fig)
    

- Note that we consistently observe val_loss < loss due to either (i) the fact that we are using dropout which affect train but not val metrics, or (ii) train metrics being summed over an epoch but val metrics being calculated only at the end. Suspect (i) because the lag appears to be longer than one epoch.