In [1]:
from hexathello import autoPlayer, engine, jable, printing

import numpy as np

from os import path, remove

# Hexathello can be played as a game, or played by AI to train
# We're unlikely to intialize a Hexathello Engine directly. Instead, use ``autoPlayer`` to setup a game
#   for AI to play

# -- Settings
game_size: int = 5
player_count: int = 2

# First, choose our AIs. The RandomHexAgent is the dumbest, picking randomly from legal moves.
# Indexing begins with 0 as it should. As a result, the 'second' player is "Player 1"; use the latter notation

ai_0: autoPlayer.HexAgent = autoPlayer.GreendomHexAgent(
    size = game_size,
    player_count = player_count,
    player_id = 0
)
    
ai_1: autoPlayer.HexAgent = autoPlayer.RandomHexAgent(
    size = game_size,
    player_count = player_count,
    player_id = 1
)
    

In [2]:
# Have them play eachother
literalHistory: jable.JyFrame = autoPlayer.runHexathello_withAgents(
    agents = [ai_0, ai_1],
    size = game_size,
    logging_level = 0
)

Early end, no move for any player
RESULT: 33 - 9; Player 0 wins
# Game done
{'winner': 0, 'turn_index': 36, 'size': 5, 'game_complete': True, 'empty_count': 19, 'player_count': 2, 'current_player': 0, 'scores': [33, 9]}


In [3]:
# a literal history shows the state at turn from the board's point of view. However, to do machine learning,
#  we need to shift the encoding of state as if it were from player 0's point of view. Otherwise, we would need
#  a different network for each player index; 2, 3 or 6 times as many

povHistory: jable.JyFrame = autoPlayer.povHistory_from_literalHistory(
    literalHistory
)

printing.prettyprint( povHistory )

player_count size history_type                ai_id turn_index current_player                                        board_state                                     action_choices                                      player_action  scores winner
------------ ---- ------------ -------------------- ---------- -------------- -------------------------------------------------- -------------------------------------------------- -------------------------------------------------- ------- ------
           2    5          pov GreendomHexAgent_0-5          0              0  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0.]  0. 1. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]  0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [33, 9]      0
           2    5          pov       RandomHexAgent          1              1  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0.]  0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]  0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [9, 33]      1
           2    

In [4]:
# Save this to the disk to be used for future learning. We encode the state and play vectors as integers
povHistory_encoded: jable.JyFrame = autoPlayer.history_asInt(
    povHistory
)

# Test we encode and decode appropriately
if True:
    povHistory_decoded: jable.JyFrame = autoPlayer.history_fromInt(
        povHistory_encoded
    )
    _povHistory_i: dict[ str, any ] = {}
    _povHistory_decoded_i: dict[ str, any ] = {}
    
    assert len( povHistory ) == len( povHistory_decoded )

    for i in range( len( povHistory ) ):
        _povHistory_i: dict = povHistory[i]
        _povHistory_decoded_i: dict = povHistory_decoded[i]

        if not len( _povHistory_i["board_state"] ) == len( _povHistory_decoded_i["board_state"] ):
            print("# Incongruity in len 'board_state' at row index={}".format(i))
            print( _povHistory_i )
            print( _povHistory_decoded_i )
            raise Exception("Len Incongruity")
        #

        if not np.all(
            _povHistory_i["board_state"] == _povHistory_decoded_i["board_state"]
        ):
            print("# Incongruity in 'board_state' at row index={}".format(i))
            print( _povHistory_i )
            print( _povHistory_decoded_i )
            raise Exception("Incongruity")
        #

        if not len( _povHistory_i["player_action"] ) == len( _povHistory_decoded_i["player_action"] ):
            print("# Incongruity in len 'player_action' at row index={}".format(i))
            print( _povHistory_i )
            print( _povHistory_decoded_i )
            raise Exception("Len Incongruity")
        #

        if not np.all(
            _povHistory_i["player_action"] == _povHistory_decoded_i["player_action"]
        ):
            print("# Incongruity in 'player_action' at row index={}".format(i))
            print( _povHistory_i )
            print( _povHistory_decoded_i )
            raise Exception("Incongruity")
        #    
    #/for i in range( len( povHistory ) )
    
    del _povHistory_decoded_i
    del _povHistory_i
    del povHistory_decoded
    
    print("# Encode/decode success")
#/if True

del povHistory_encoded
del povHistory
del literalHistory

del ai_1
del ai_0

# Encode/decode success


In [5]:
# We want to get some baseline data for learning, using a Greendom Agent, which is smarter
#   than a simple random agent
# We store this at `data/histories/greendom_size-5_players-2.json`
histories_dir: str = path.join(
    'data',
    'history',
    'examples'
)
assert path.isdir(
    histories_dir
)

baseline_data_path: str = path.join(
    histories_dir,
    'greendom_size-{}_players-{}.json'.format(
        game_size, player_count
    )
)
baseline_data: jable.JyFrame

max_data: int = 20000
existing_data: int = 0
if path.isfile( baseline_data_path ):
    print("# Reading data from {}".format( baseline_data_path ))
    baseline_data: jable.JyFrame = jable.read_file(
        baseline_data_path
    )
    existing_data = len( baseline_data )
#
else:
    print("# Starting new data at {}".format( baseline_data_path ))
    baseline_data: jable.JyFrame = autoPlayer.new_literalHistory(
        player_count = player_count,
        size = game_size
    )
    baseline_data = autoPlayer.history_asInt( baseline_data )
    
    # We need to change scores and winner to be shift
    baseline_data.makeColumn_shift( "winner" )
    baseline_data.makeColumn_shift( "scores" )
#/if path.isfile( baseline_data_path )/else

ai_0: autoPlayer.GreendomHexAgent = autoPlayer.GreendomHexAgent(
    size = game_size,
    player_count = player_count,
    player_id = 0,
    p = 0.4
)

ai_1: autoPlayer.GreendomHexAgent = autoPlayer.GreendomHexAgent(
    size = game_size,
    player_count = player_count,
    player_id = 1,
    p = 0.4
)
    
play_index: int = 0
if existing_data >= max_data:
    print("# Already have existing_data={}".format( existing_data) )
#

while existing_data < max_data:
    print("# Starting play_index={}".format(play_index))
    literalHistory: jable.JyFrame = autoPlayer.runHexathello_withAgents(
        agents = [ai_0, ai_1],
        size = game_size,
        logging_level = 0
    )
    
    baseline_data.extend(
        autoPlayer.history_asInt(
            literalHistory
        )
    )
    
    baseline_data.write_file(
        fp = baseline_data_path
    )
    
    existing_data = len( baseline_data )
    play_index += 1
    
    # Safety Valve
    if play_index > max_data:
        raise Exception("Too large play_index={}".format(play_index))
    #
#/while existing_data < max_data

printing.prettyprint( baseline_data[:50] )

del ai_1
del ai_0
del baseline_data
del existing_data
del max_data


# Reading data from data/histories/greendom_size-5_players-2.json
# Already have existing_data=20014
player_count size history_type                ai_id turn_index current_player                          board_state     action_choices      player_action winner   scores
------------ ---- ------------ -------------------- ---------- -------------- ------------------------------------ ------------------ ------------------ ------ --------
           2    5      literal GreendomHexAgent_0-4          0              0             453356693984287226068992     71605704212480      1099511627776      1 [19, 23]
           2    5      literal GreendomHexAgent_0-4          1              1            3173439788117202869157888    351843721953280              16384      1 [19, 23]
           2    5      literal GreendomHexAgent_0-4          2              0            3173439788112805091082240            1048704                128      1 [19, 23]
           2    5      literal GreendomHexAgent_0-4   

In [6]:
raise Exception("UC")

# Read the data from disk to learn from
history_fromDisk: jable.JyFrame = jable.read_file(
    baseline_data_path
)

# Decode the state, option, and play vectors from integers to numpy arrays
history_decoded: jable.JyFrame = autoPlayer.history_fromInt(
    history_fromDisk
)
    
# Make it PoV 0 to appropriately learn
povHistory: jable.JyFrame = autoPlayer.povHistory_from_literalHistory(
    history_decoded
)

del history_decoded
del history_fromDisk

# We want to train a Keras Neural Network on the data we have.
# The input size is the length of a state vector
# The output size is the length of the play vector
# Take both from the first row
input_size: int = len( povHistory[0,'board_state'] )
output_size: int = len( povHistory[0, 'player_action'] )

# The `KerasHexAgent` subclass of `HexAgent` has a `brain` property; this is the neural network
# We could in fact us any objects conforming to the `PredictionModel` protocol, which has methods:
#   - fit()
#   - predict()
#   - call()
#
# We're going to train it on the Greendom data
import tensorflow as tf
brain_input = tf.keras.layers.Input(
    shape = (input_size,),
    name = 'keras_tensor'
)

# Get creative with architecture
brain_next = tf.keras.layers.Dense(
    input_size**2,
    activation = 'relu'
)( brain_input )

brain_next = tf.keras.layers.Dense(
    input_size**2,
    activation = 'relu'
)

brain_output = tf.keras.layers.Dense(
    output_size,
    activation = 'sigmoid'
)

brain_model: tf.keras.Model = tf.keras.Model(
    brain_input,
    brain_output
)
    
# Choose your learning rate and optimizer. Adam is probably good for the latter.
# You most likely want Binary Cross Entropy
brain_model.compile(
    optimizer = tf.keras.optimizers.Adam(
        learning_rate = 0.005
    ),
    loss = tf.keras.losses.BinaryCrossentropy()
)

# Init the AI Agent
ai_keras: autoPlayer.KerasHexAgent = autoPlayer.KerasHexAgent(
    size = game_size,
    player_count = player_count,
    brain = brain_model,
    player_id = None,
    ai_id = 'Keras_alpha_0'
)

ai_keras.train(
    history = povHistory,
    epochs = 400
)

Exception: UC