# Connect 4

---

Author: S. Menary [sbmenary@gmail.com]

Date  : 2023-01-15, last edit 2023-01-15

Brief : Develop a bot using a neural network bot with Monte Carlo Tree Search (MCTS)

---

## Notes

- apply random reflections to exploit parity symmetry
- represent all inputs from perspective of current player, so +1 for self and -1 for other

## Imports

In [1]:
###
###  Required imports
###  - all imports should be placed here
###


##  Python core libs
import sys

##  PyPI libs
import numpy as np
import tensorflow as tf

from tensorflow.keras.models import Model
from tensorflow.keras.layers import BatchNormalization, Conv2D, Dense, Dropout, Flatten, Input, MaxPooling2D, Softmax

##  Local packages
from connect4.utils import DebugLevel
from connect4.game  import BinaryPlayer, GameBoard
from connect4.MCTS  import Node_Base
from connect4.bot   import Bot_Base


In [2]:
###
###  Print version for reproducibility
###

print(f"{'Python'    .rjust(12)} version is {sys.version}")
print(f"{'Numpy'     .rjust(12)} version is {np.__version__}")
print(f"{'Tensorflow'.rjust(12)} version is {tf.__version__}")
print(f"Tensorflow with physical devices: {', '.join([x.name for x in tf.config.list_physical_devices()])}")


      Python version is 3.10.8 | packaged by conda-forge | (main, Nov 22 2022, 08:25:29) [Clang 14.0.6 ]
       Numpy version is 1.23.2
  Tensorflow version is 2.11.0
Tensorflow with physical devices: /physical_device:CPU:0


In [3]:

def create_model(board_size=(7, 6), num_filters=10, num_conv_blocks=3, batch_norm=True, max_pool=False, 
                 dropout=0.1, dense_width=100, num_dense=2,  name=None) :
    
    inp = Input((board_size[0], board_size[1], 1))
    
    x = inp
    for block_idx in range(num_conv_blocks) :
        x = Conv2D(num_filters, kernel_size=(2,2), padding="same")(x)
        if batch_norm  : x = BatchNormalization()(x)
        if dropout > 0 : x = Dropout(dropout)(x)
        if max_pool    : x = MaxPooling2D()(x)
    
    x = Flatten()(x)
    
    for dense_idx in range(num_dense) :
        x = Dense(dense_width, activation="relu")(x)
        if batch_norm  : x = BatchNormalization()(x)
        if dropout > 0 : x = Dropout(dropout)(x)
        
    xp = Dense(board_size[0], activation="linear")(x)
    xp = Softmax()(xp)
    
    xv = Dense(1, activation="linear")(x)
    
    x = Model(inp, [xp, xv], name=name)
    return x


In [4]:
'''
def create_model(board_size=(7, 6), batch_norm=True, dropout=0.1, dense_width=100, num_dense=2, name=None) :
    
    inp = Input((board_size[0], board_size[1], 1))
    
    x = inp
    x = Flatten()(x)
    for dense_idx in range(num_dense) :
        x = Dense(dense_width, activation="relu")(x)
        if batch_norm  : x = BatchNormalization()(x)
        if dropout > 0 : x = Dropout(dropout)(x)
        
    xp = Dense(board_size[0], activation="linear")(x)
    xp = Softmax()(xp)
    
    xv = Dense(1, activation="linear")(x)
    
    x = Model(inp, [xp, xv], name=name)
    return x
'''

'\ndef create_model(board_size=(7, 6), batch_norm=True, dropout=0.1, dense_width=100, num_dense=2, name=None) :\n    \n    inp = Input((board_size[0], board_size[1], 1))\n    \n    x = inp\n    x = Flatten()(x)\n    for dense_idx in range(num_dense) :\n        x = Dense(dense_width, activation="relu")(x)\n        if batch_norm  : x = BatchNormalization()(x)\n        if dropout > 0 : x = Dropout(dropout)(x)\n        \n    xp = Dense(board_size[0], activation="linear")(x)\n    xp = Softmax()(xp)\n    \n    xv = Dense(1, activation="linear")(x)\n    \n    x = Model(inp, [xp, xv], name=name)\n    return x\n'

In [10]:
###======================================###
###   Node_NeuralMCTS class definition   ###
###======================================###

class Node_NeuralMCTS(Node_Base) :
    
    def __init__(self, game_board:GameBoard, parent:Node_Base=None, params:list=[], shallow_copy_board:bool=False, 
                 a_idx=-1, label=None) :
        ##  Call Node_Base initialiser with params=[model, c]
        super().__init__(game_board, parent, params, shallow_copy_board, a_idx, label)
        
        ##  Resolve the prior_prob for this node
        self.prior_prob = parent.child_priors[a_idx] if parent else 0
        
        ##  Resolve the hyper-params
        self.model = params[0]
        self.c     = params[1]
        
        ##  Set model input for this node
        model_input = game_board.board
        if game_board.to_play == BinaryPlayer.O :
            model_input = -game_board.board
        model_input = model_input.reshape((1, model_input.shape[0], model_input.shape[1], 1))
        
        ##  Store model input and output
        self.model_input = model_input
        self.child_priors, self.prior_value = self.model(model_input)
        self.child_priors, self.prior_value = self.child_priors.numpy()[0], self.prior_value.numpy()[0]
        
        
    def get_expansion_score(self) -> float :
        """
        Returns the UCT score of this node
        """
        
        ##  If node has no parent then no UCT score exists
        if not self.parent :
            return np.nan
        
        ##  Calculate mean score from past games
        mean_score = self.total_score / self.num_visits
        
        ##  Otherwise calculate UCT score
        return mean_score + self.c*self.prior_prob*np.sqrt(self.parent.num_visits) / (1+self.num_visits)
    
    
    def get_simulated_action(self, game_board:GameBoard) -> int :
        """
        simulate actions according to the policy
        """
        ##  Query model
        model_input = game_board.board
        if game_board.to_play == BinaryPlayer.O :
            model_input = -game_board.board
        model_input = model_input.reshape((1, model_input.shape[0], model_input.shape[1], 1))
        action_priors, value = self.model(model_input)
        action_priors = action_priors.numpy()[0]    
        
        available_actions = game_board.get_available_actions()
        action_probs  = np.array([action_priors[a_idx] for a_idx in available_actions])
        action_probs /= action_probs.sum()
        return np.random.choice(available_actions, p=action_probs)
    
    
    def simulate(self, max_sim_steps:int=-1, debug_lvl:DebugLevel=DebugLevel.MUTE) -> GameResult :
        
        ##  Check if game has already been won
        ##  - if so then return score
        ##  - score is -1 if target player has lost, +1 if they've won, and 0 for a draw
        result = self.game_board.get_result()
        if result :
            debug_lvl.message(DebugLevel.MEDIUM, f"Leaf node found with result {result.name}")
            return result
                
        ##  Create copy of game board to play simulation
        simulated_game = self.game_board.deep_copy()
        
        ##  Keep playing moves until one of terminating conditions is reached:
        ##  1. game is won by a player
        ##  2. no further moves are possible, game is considered a draw
        ##  3. maximum move limit is reached, game is considered a draw
        turn_idx, is_terminal, result = 0, False, GameResult.NONE
        trajectory = []
        while not is_terminal :
            turn_idx += 1
            action = self.get_simulated_action(simulated_game)
            trajectory.append(f"{simulated_game.to_play}:{action}")
            simulated_game.apply_action(action)
            result = simulated_game.get_result()
            if result :
                is_terminal = True
                  
        ##  Debug trajectory
        debug_lvl.message(DebugLevel.MEDIUM, f"Simulation ended with result {result.name}")
        debug_lvl.message(DebugLevel.HIGH  , f"Simulated trajectory was: {' '.join(trajectory)}")
                                
        ##  Return score
        return result
    

In [11]:
###=====================================###
###   Bot_NeuralMCTS class definition   ###
###=====================================###

class Bot_NeuralMCTS(Bot_Base) :
    
    def __init__(self, model, c=1.) :
        self.model = model
        self.c     = c
    
    def create_root_node(self, game_board) :
        """
        Create a Neural MCTS node.
        """
        return Node_NeuralMCTS(game_board, params=[self.model, self.c])
    

In [12]:
#  Play a bot game!

game_board = GameBoard()
bot        = Bot_NeuralMCTS(create_model())
print(game_board)

result = game_board.get_result()
while not result :
    bot.take_move(game_board, duration=5, debug_lvl=DebugLevel.LOW)
    result = game_board.get_result()
    print(game_board)


+---+---+---+---+---+---+---+
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
+---+---+---+---+---+---+---+
| 0 | 1 | 2 | 3 | 4 | 5 | 6 |
+---+---+---+---+---+---+---+
Game result is: NONE
Action values are:  -0.33  -1.00  0.11   0.35   -1.00  0.14   -1.00 
Visit counts are:   3      1      18     65     1      37     1     
Selecting action 3
+---+---+---+---+---+---+---+
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | [31mX[0m | . | . | . |
+---+---+---+---+---+---+---+
| 0 | 1 | 2 | 3 | 4 | 5 | 6 |
+---+---+---+---+---+---+---+
Game result is: NONE
Action values are:  -1.00  -0.04  -1.00  -1.00  -0.60  -0.26  -1.00 
Visit counts are:   1      25     1      1      5      99     1     
Selecting action 1
+---+---+---+---+---+---+---+
| . |

Action values are:  -0.75  -0.52  -0.57  -1.00  -0.60  -1.00  -0.45 
Visit counts are:   8      136    28     4      20     4      55    
Selecting action 6
+---+---+---+---+---+---+---+
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | [31mX[0m | . | [31mX[0m | . |
| . | . | . | [34mO[0m | . | [34mO[0m | [34mO[0m |
| [34mO[0m | [31mX[0m | . | [31mX[0m | . | [34mO[0m | [31mX[0m |
| [31mX[0m | [34mO[0m | [34mO[0m | [31mX[0m | . | [34mO[0m | [31mX[0m |
+---+---+---+---+---+---+---+
| 0 | 1 | 2 | 3 | 4 | 5 | 6 |
+---+---+---+---+---+---+---+
Game result is: NONE
Action values are:  -1.00  0.29   0.54   -1.00  -0.33  0.44   0.39  
Visit counts are:   1      14     70     1      3      176    31    
Selecting action 2
+---+---+---+---+---+---+---+
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | [31mX[0m | . | [31mX[0m | . |
| . | . | . | [34mO[0m | . | [34mO[0m | [34mO[0m |
| [34mO[0m | [31mX[0m 

Action values are:  0.98   0.00   0.00   0.09   0.37   0.50  
Visit counts are:   5853   12     12     11     19     24    
Selecting action 0
+---+---+---+---+---+---+---+
| . | . | [34mO[0m | . | . | . | . |
| . | [34mO[0m | [31mX[0m | [34mO[0m | . | [31mX[0m | [31mX[0m |
| . | [31mX[0m | [34mO[0m | [31mX[0m | . | [31mX[0m | [34mO[0m |
| [31mX[0m | [31mX[0m | [34mO[0m | [34mO[0m | . | [34mO[0m | [34mO[0m |
| [34mO[0m | [31mX[0m | [31mX[0m | [31mX[0m | . | [34mO[0m | [31mX[0m |
| [31mX[0m | [34mO[0m | [34mO[0m | [31mX[0m | . | [34mO[0m | [31mX[0m |
+---+---+---+---+---+---+---+
| 0 | 1 | 2 | 3 | 4 | 5 | 6 |
+---+---+---+---+---+---+---+
Game result is: NONE
Action values are:  -0.98  -0.99  -0.99  -0.99  -0.99  -0.99 
Visit counts are:   1945   1346   1675   1665   1470   1038  
Selecting action 0
+---+---+---+---+---+---+---+
| . | . | [34mO[0m | . | . | . | . |
| . | [34mO[0m | [31mX[0m | [34mO[0m | . | [31mX[0m | 