In [1]:
import hexathello.aiPlayers as aiPlayers
import hexathello.AutoPlayer as autoPlayer
import hexathello.Engine as engine
import hexathello.history as history
import hexathello.jable as jable
import hexathello.printing as printing
import tensorflow as tf
import numpy as np
from os import path

# -- Settings
game_size: int = 5
player_count: int = 2

In [2]:
# We want to train a KerasHexAgent. To do this, we use data we created in `quickstart_recording_data.ipynb`

history_dir: str = path.join(
    'data',
    'history',
    'examples'
)
assert path.isdir(
    history_dir
)

baseline_data_path: str = path.join(
    history_dir,
    'greendom_size-{}_players-{}.json'.format(
        game_size, player_count
    )
)
   

In [3]:
# Read the data from disk to learn from
history_fromDisk: jable.JyFrame = jable.read_file(
    baseline_data_path
)

# Decode the state, option, and play vectors from integers to numpy arrays
history_decoded: jable.JyFrame = history.history_fromInt(
    history_fromDisk
)

assert len( history_decoded ) >= 20000
    
# Make it PoV 0 to appropriately learn
povHistory: jable.JyFrame = history.povHistory_from_literalHistory(
    history_decoded
)

del history_decoded
del history_fromDisk

In [6]:
# We want to train a Keras Neural Network on the data we have.
# The input size is the length of a state vector
# The output size is the length of the play vector
# Take both from the first row
input_size: int = len( povHistory[0,'board_state'] )
output_size: int = len( povHistory[0, 'player_action'] )

# The `KerasHexAgent` subclass of `HexAgent` has a `brain` property; this is the neural network
# We could in fact us any objects conforming to the `PredictionModel` protocol, which has methods:
#   - fit()
#   - predict()
#   - call()
#
# We're going to train it on the Greendom data
# Match the input to a board state vector
import tensorflow as tf

ai_keras_id: str = 'kha_alpha_size-{}_players-{}_0'.format(
    game_size, player_count
)

ai_keras_path: str = path.join(
    'data',
    'ai',
    'examples',
    '{}.keras'.format( ai_keras_id )
)

brain_model: tf.keras.Model
if path.isfile( ai_keras_path ):
    brain_model = tf.keras.models.load_model( ai_keras_path )
#
else:
    brain_input = tf.keras.layers.Input(
        shape = (input_size,),
        name = 'keras_tensor'
    )

    # Get creative with architecture on the inside
    brain_next = tf.keras.layers.Dense(
        input_size*2,
        activation = 'relu'
    )( brain_input )

    brain_next = tf.keras.layers.Dense(
        input_size*2,
        activation = 'relu'
    )( brain_next )

    # Make the output size equal to the move vector size
    brain_output = tf.keras.layers.Dense(
        output_size,
        activation = 'sigmoid'
    )( brain_next )

    brain_model: tf.keras.Model = tf.keras.Model(
        brain_input,
        brain_output
    )

    # Choose your learning rate and optimizer. Adam is probably good for the latter.
    # You most likely want Binary Cross Entropy. Learning rate 0.0001 to 0.01 is likely fine
    brain_model.compile(
        optimizer = tf.keras.optimizers.Adam(
            learning_rate = 0.005
        ),
        loss = tf.keras.losses.CategoricalCrossentropy()
    )
#/if path.isfile( ai_keras_path )

# Init the AI Agent
ai_keras: aiPlayers.KerasHexAgent = aiPlayers.KerasHexAgent(
    size = game_size,
    player_count = player_count,
    brain = brain_model,
    player_id = None,
    ai_id = ai_keras_id
)

# Set the checkpoint to save
ai_keras_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=ai_keras_path,
    monitor='loss',
    mode='min',
    save_best_only=True
)

ai_keras.train(
    game_history = povHistory,
    epochs = 10,
    callbacks = [ ai_keras_checkpoint_callback ]
)



Epoch 1/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: -12182501.0000
Epoch 2/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: -13205111.0000
Epoch 3/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: -12781529.0000
Epoch 4/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: -12617915.0000
Epoch 5/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: -12695436.0000
Epoch 6/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: -13525784.0000
Epoch 7/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: -14271192.0000
Epoch 8/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: -12115589.0000
Epoch 9/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: -13056380.0000
Epoch 10/10
[1m626

In [9]:
# Save the network
ai_keras.brain.summary()


# Get creative with the brain you use to train a KerasHexAgent, and try writing a subclass changing:
#   .prep_training_history(...)
#   .chooseMove(...)

class CreativeKerasHexAgent(aiPlayers.KerasHexAgent):

    def prep_training_history(self, game_history):
        # takes every board state and player's action in every game and returns dictionary
        inputs = np.array([entry['board_state'] for entry in game_history])
        targets = np.array([entry['player_action'] for entry in game_history])
        return {
            "board_state": inputs,
            "player_action": targets
        }

    def chooseMove(self, board_state, valid_moves):
        board = np.array(board_state)
    
        # predicts scores for current board state
        #reshape to match model's expected board shape, takes first prediction out of a batch
        scores = self.brain.predict(board.reshape(1, -1),)[0]
    
        # Set scores of invalid moves to low value so they dont choose it
        for i in range(len(scores)):
            if i not in valid_moves:
                scores[i] = -np.inf
    
        # return move with max score
        return int(np.argmax(scores))

        
# Define a new path for saving the CreativeKerasHexAgent
creative_ai_keras_id: str = 'kha_creative_size-{}_players-{}_0'.format(
    game_size, player_count
)

creative_ai_keras_path: str = path.join(
    'data',
    'ai',
    'examples',
    '{}.keras'.format( creative_ai_keras_id )
)

# new checkpoint callback for the creative agent
creative_ai_keras_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=creative_ai_keras_path,
    monitor='loss',
    mode='min',
    save_best_only=True
)

# initialize creative agent
ai_keras: aiPlayers.KerasHexAgent = CreativeKerasHexAgent(
    size=game_size,
    player_count=player_count,
    brain=brain_model,
    player_id=None,
    ai_id=creative_ai_keras_id
)


print("Creative model saved to:", creative_ai_keras_path)
# train creative agent and save model to new path
ai_keras.train(
    game_history = povHistory,
    epochs = 10,
    callbacks = [ creative_ai_keras_checkpoint_callback ]
)

Creative model saved to: data/ai/examples/kha_creative_size-5_players-2_0.keras
Epoch 1/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 148204400.0000
Epoch 2/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 3141112.7500
Epoch 3/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 249427.6875
Epoch 4/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 384217.0938
Epoch 5/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 532534.4375 
Epoch 6/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 141285.2812
Epoch 7/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 31964.1348
Epoch 8/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 2498.5256
Epoch 9/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

In [4]:
import os
import math
import tensorflow as tf

input_size: int = len(povHistory[0, 'board_state'])
output_size: int = len(povHistory[0, 'player_action'])

from os import path, makedirs
save_dir = path.join("data","ai","layer_sweeper")
makedirs(save_dir, exist_ok=True)

def create_agent_with_layers(layers, width, input_size, output_size):
    # create input layer with input size given
    brain_input = tf.keras.layers.Input(shape=(input_size,), name='keras_tensor')

    # for each layer add a layer with width neurons
    x = brain_input
    for i in range(layers):
        x = tf.keras.layers.Dense(width, activation='sigmoid')(x)

    # last layer with output size neurons, to actually give an answer on what move to do
    brain_output = tf.keras.layers.Dense(output_size, activation='sigmoid')(x)

    # create/compile model
    model = tf.keras.Model(inputs=brain_input, outputs=brain_output)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.005),
        loss=tf.keras.losses.CategoricalCrossentropy()
    )

    return model

# each model has i hidden layers, so it increase by 1 every model
models = [create_agent_with_layers(layers=i, width=122, input_size=input_size, output_size=output_size) for i in range(1, 10)]


#show every model's summary and train them
trained_agents = []
training_epochs = 10
p_random = 0.3
top_n = 3

for i, model in enumerate(models, start=1):
    print(f"\nTraining model with {i} layers...")
    ai_id = f"kha_layers_{i}"
    ckpt_path = path.join(save_dir, f"{ai_id}.keras")
    
    agent = aiPlayers.KerasHexAgent(
        size = game_size,
        player_count = player_count,
        brain = model,
        player_id = None,
        ai_id = f'kha_layers_{i}'
    )
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
        filepath=ckpt_path,
        monitor="loss",
        mode="min",
        save_best_only=True
    )
    agent.train(
        game_history = povHistory,
        epochs = training_epochs,
        callbacks = [checkpoint_cb]
    )
    trained_agents.append(agent)
    
    print(f"\nModel with {i} layers:")
    # train each model
    #initialize list of models
    # have them play using autoplayer
    # get 3 biggest agents play eachother
    #set p random to 0.3
    #iterate through every combination of 2, so every agent same intellegince
    # save history into disk
    # take histories and use it to train other agents w same architecture
    model.summary()


Training model with 1 layers...
Epoch 1/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1.5229
Epoch 2/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 1.0169
Epoch 3/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.8516
Epoch 4/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.7504
Epoch 5/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.7040
Epoch 6/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.6602
Epoch 7/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.6314
Epoch 8/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.5976
Epoch 9/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.5648
Epoch 10/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m


Training model with 2 layers...
Epoch 1/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1.5944
Epoch 2/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.1239
Epoch 3/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.9831
Epoch 4/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.8604
Epoch 5/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.7832
Epoch 6/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.6962
Epoch 7/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.6416
Epoch 8/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.6024
Epoch 9/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.5445
Epoch 10/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m


Training model with 3 layers...
Epoch 1/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1.6388
Epoch 2/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.3850
Epoch 3/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.2260
Epoch 4/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.1341
Epoch 5/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.0453
Epoch 6/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.9444
Epoch 7/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.8647
Epoch 8/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.8131
Epoch 9/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.7364
Epoch 10/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m


Training model with 4 layers...
Epoch 1/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 1.6777
Epoch 2/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.5571
Epoch 3/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.5519
Epoch 4/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.4958
Epoch 5/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.4324
Epoch 6/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.3744
Epoch 7/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.3244
Epoch 8/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.2799
Epoch 9/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.2351
Epoch 10/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m


Training model with 5 layers...
Epoch 1/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 1.7301
Epoch 2/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.5437
Epoch 3/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.5727
Epoch 4/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.5491
Epoch 5/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.5243
Epoch 6/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.5073
Epoch 7/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.4778
Epoch 8/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.4399
Epoch 9/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.4218
Epoch 10/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m


Training model with 6 layers...
Epoch 1/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1.7306
Epoch 2/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.5817
Epoch 3/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.5915
Epoch 4/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.5326
Epoch 5/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 1.5496
Epoch 6/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.5203
Epoch 7/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.5360
Epoch 8/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.5276
Epoch 9/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.5299
Epoch 10/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m


Training model with 7 layers...
Epoch 1/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 1.7287
Epoch 2/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6799
Epoch 3/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6512
Epoch 4/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6091
Epoch 5/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.5992
Epoch 6/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.5870
Epoch 7/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6116
Epoch 8/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.5956
Epoch 9/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6051
Epoch 10/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m


Training model with 8 layers...
Epoch 1/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 1.7136
Epoch 2/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6661
Epoch 3/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6885
Epoch 4/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6833
Epoch 5/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6756
Epoch 6/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6679
Epoch 7/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6614
Epoch 8/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6882
Epoch 9/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6566
Epoch 10/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m


Training model with 9 layers...
Epoch 1/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 1.7355
Epoch 2/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6743
Epoch 3/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6724
Epoch 4/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6516
Epoch 5/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6652
Epoch 6/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6703
Epoch 7/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1.6955
Epoch 8/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.6743
Epoch 9/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1.6473
Epoch 10/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [13]:
# We want to get some baseline data for learning, using a Greendom Agent, which is smarter
#   than a simple random agent
# We store this at `data/histories/greendom_size-5_players-2.json`


# -- Settings
game_size: int = 5
player_count: int = 2

# Select top 3 agents with most layers
top_agents = sorted(trained_agents, 
                   key = lambda x: len(x.brain.layers), 
                   reverse = True)[:3]

# Set randomness probability
for agent in top_agents:
    agent.p_random = p_random
    print(agent)


# Only take the top 2 agents for actual match
agent1, agent2 = top_agents[0], top_agents[1]


histories_dir: str = path.join(
    'data',
    'history',
    'examples'
)
assert path.isdir(
    histories_dir
)

baseline_data_path: str = path.join(
    histories_dir,
    'custom_size-{}_players-{}.json'.format(
        game_size, player_count
    )
)
baseline_data: jable.JyFrame

max_data: int = 20000
existing_data: int = 0
if path.isfile( baseline_data_path ):
    print("# Reading data from {}".format( baseline_data_path ))
    baseline_data: jable.JyFrame = jable.read_file(
        baseline_data_path
    )
    existing_data = len( baseline_data )
#
else:
    print("# Starting new data at {}".format( baseline_data_path ))
    baseline_data: jable.JyFrame = history.new_literalHistory(
        player_count = player_count,
        size = game_size
    )
    baseline_data = history.history_asInt( baseline_data )
    
    # We need to change scores and winner to be shift
    baseline_data.makeColumn_shift( "winner" )
    baseline_data.makeColumn_shift( "scores" )
#/if path.isfile( baseline_data_path )/else

agent1: aiPlayers.KerasHexAgent = aiPlayers.KerasHexAgent(
    size = game_size,
    player_count = player_count,
    player_id = 0,
    p_random = p_random
)

agent2: aiPlayers.KerasHexAgent = aiPlayers.KerasHexAgent(
    size = game_size,
    player_count = player_count,
    player_id = 1,
    p_random = p_random
)
    
play_index: int = 0
if existing_data >= max_data:
    print("# Already have existing_data={}".format( existing_data) )

while existing_data < max_data:
    print(f"\nMatch: {agent1.ai_id} vs {agent2.ai_id}")
        
    literalHistory: jable.JyFrame = autoPlayer.runHexathello_withAgents(
    agents = [agent1, agent2],
    size = game_size,
    logging_level = 0
    )
    
    baseline_data.extend(
    history.history_asInt(
        literalHistory
        )
    )
    
    baseline_data.write_file(
        fp = baseline_data_path
    )
    
    existing_data = len( baseline_data )
    play_index += 1
    
    # Safety Valve
    if play_index > max_data:
        raise Exception("Too large play_index={}".format(play_index))
            
    printing.prettyprint( baseline_data[:50] )


<hexathello.aiPlayers.KerasHexAgent object at 0x1388b7e00>
<hexathello.aiPlayers.KerasHexAgent object at 0x139fe9eb0>
<hexathello.aiPlayers.KerasHexAgent object at 0x13887ab70>
# Reading data from data/history/examples/custom_size-5_players-2.json
# Already have existing_data=20014


In [15]:
# Convert to literal then pov for retraining
literalHistory = history.history_fromInt(baseline_data)
povHistory = history.povHistory_from_literalHistory(literalHistory)

# Then train with:
for i, agent in enumerate(top_agents, start=1):
    print(f"\nRetraining agent {i} ({agent.ai_id})")
    agent.train(
        game_history = literalHistory,
        epochs = training_epochs
    )

    #loop from agent 0 to 8 (9 agents) for i in 0 to 8
        #loop from j to i to 8, grab agent i and agent j
    #could start with top 3

    
    #for saving histories, can save them all in one or seperately as long as I keep track of them


Retraining agent 1 (kha_layers_9)
Epoch 1/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 1.6667
Epoch 2/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 1.6597
Epoch 3/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 1.6929
Epoch 4/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 1.6442
Epoch 5/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 1.6584 
Epoch 6/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 1.6698
Epoch 7/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 1.6607
Epoch 8/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 1.6598
Epoch 9/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 1.6461
Epoch 10/10
[1m626/626[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [17]:
from itertools import permutations


# re-initialize 
# everytime i train make them train based off latest history

#implement some sort of elo rating for ai
# add elo to winner, subtract same amount from loser
#every ai starts at 4,000 elo
#win or loss is +- 20 elo
#have all agents play eachother in every combination
#find a reasonable result, some sort of conclusion

agent1, agent2, agent3, agent4, agent5, agent6, agent7, agent8, agent9 = trained_agents[0], top_agents[1], trained_agents[2], trained_agents[3], trained_agents[4], trained_agents[5], trained_agents[6], trained_agents[7], trained_agents[8]

playing_agents = sorted(trained_agents, 
                   key = lambda x: len(x.brain.layers), 
                   reverse = True)[:]

# Set randomness probability
for agent in playing_agents:
    agent.p_random = p_random
    print(agent)


for i in range(len(playing_agents)):
    for j in range(len(playing_agents)):
        if i == j:
            continue  # Skip self-play
        agent1 = playing_agents[i]
        agent2 = playing_agents[j]
        
        print(f"\nMatch: {agent1.ai_id} vs {agent2.ai_id}")


        literalHistory: jable.JyFrame = autoPlayer.runHexathello_withAgents(
            agents=[agent1, agent2],
            size=game_size,
            logging_level=0
        )

        baseline_data.extend(
            history.history_asInt(literalHistory)
        )

        baseline_data.write_file(
            fp=baseline_data_path
        )

        existing_data = len(baseline_data)
        play_index += 1

        if play_index > max_data:
            raise Exception("Too large play_index={}".format(play_index))

        printing.prettyprint(baseline_data[:50])

<hexathello.aiPlayers.KerasHexAgent object at 0x1388b7e00>
<hexathello.aiPlayers.KerasHexAgent object at 0x139fe9eb0>
<hexathello.aiPlayers.KerasHexAgent object at 0x13887ab70>
<hexathello.aiPlayers.KerasHexAgent object at 0x138857aa0>
<hexathello.aiPlayers.KerasHexAgent object at 0x139a3ed20>
<hexathello.aiPlayers.KerasHexAgent object at 0x137c9c470>
<hexathello.aiPlayers.KerasHexAgent object at 0x1399f7d40>
<hexathello.aiPlayers.KerasHexAgent object at 0x129840590>
<hexathello.aiPlayers.KerasHexAgent object at 0x138866930>

Match: kha_layers_9 vs kha_layers_8


ValueError: Exception encountered when calling Functional.call().

[1mInvalid input shape for input Tensor("data:0", shape=(32,), dtype=float32). Expected shape (None, 122), but input has incompatible shape (32,)[0m

Arguments received by Functional.call():
  • inputs=tf.Tensor(shape=(32,), dtype=float32)
  • training=False
  • mask=None