In [1]:
import os
import pandas as pd
import tensorflow as tf
import numpy as np

import utils
import constants

In [5]:
def train_model(model, data_dir="data/split/", n_files=25, epochs=1, ckpt_dir="ckpt/aunty"):

    file_order = list(range(1, n_files+1))

    if not os.path.exists(ckpt_dir):
        os.mkdir(ckpt_dir)
    if not os.path.exists(utils.path_join(ckpt_dir, model.name)):
        os.mkdir(utils.path_join(ckpt_dir, model.name))

    checkpoints = [utils.path_join(ckpt_dir, model.name, name) for name in os.listdir(utils.path_join(ckpt_dir, model.name))]
    if checkpoints:
        latest_checkpoint = max(checkpoints, key=os.path.getctime)
        print("Restoring from", latest_checkpoint)
        model = tf.keras.models.load_model(latest_checkpoint)
        n = latest_checkpoint.split("-")[-1][0]
    else:
        n = 0

    processed_files = [filename.split(".")[0] for filename in os.listdir(utils.path_join(data_dir, "processed"))]
    for file in file_order:
        if file <= int(n):
            continue
        filename = f"chess{file}"
        print(f"Training on file {filename}")

        # load data from .csv file
        if filename in processed_files:
            df = pd.read_pickle(utils.path_join(data_dir, "processed", filename + ".pkl"))
        else:
            df = pd.read_csv(utils.path_join(data_dir, filename + ".csv"))

            # translate from fen to obs arr
            print("Processing...", end="")
            df.loc[:, "obs"] = df.loc[:, "board"].map(utils.parse_fen)
            print("complete")

        # convert to numpy arrays
        x = {
            "board": tf.keras.utils.to_categorical(np.array(df["obs_board"].values.tolist()), num_classes=13), 
            "misc":np.array(df["obs_misc"].values.tolist())
        }
        y = {"ai":df["move"].values, "aunty":df["outcome"].values}

        # train
        model.fit(x, y, batch_size=64, epochs=epochs, validation_split=0.1)

        # save checkpoint between files
        model.save(utils.path_join(ckpt_dir, model.name, f"{model.name}-{file}.h5"))
    print(f"Training completed. Final save file: {utils.path_join(ckpt_dir, model.name, f'{model.name}-{file}.h5')}")

In [6]:
def evaluate_model(model_names, data="validation", data_dir="data/split/", ckpt_dir="ckpt/aunty", metric="binary_crossentropy"):

    models = []
    for model_name in model_names:
        checkpoints = [utils.path_join(ckpt_dir, model_name, name) for name in os.listdir(utils.path_join(ckpt_dir, model_name))]
        if checkpoints:
            latest_checkpoint = max(checkpoints, key=os.path.getctime)
            print("Restoring from", latest_checkpoint)
            model = tf.keras.models.load_model(latest_checkpoint)
            models.append(model)
        else:
            print("No checkpoint available")

    # load data
    if data == "validation":
        data = pd.read_csv(utils.path_join(data_dir, "chess25.csv"))
    elif data == "test":
        data = pd.read_csv(utils.path_join(data_dir, "chess26.csv"))
    
    # convert fen to obs
    print("Processing...", end="")
    data.loc[:, "obs"] = data.loc[:, "board"].map(utils.parse_fen)
    print("complete")

    # convert to numpy arrays
    x = np.array(data["obs"].values.tolist())
    y = data["outcome"].values

    performance = []
    for model_name, model in zip(model_names, models):
        performance.append([model_name, model.evaluate(x, y, batch_size=128, return_dict=True)[metric]])

    return pd.DataFrame(performance, columns=["model_name", metric])

In [None]:

activation = "relu"
board_input = tf.keras.Input(shape=(64, 13), name="board")
misc_input = tf.keras.Input(shape=(6), name="misc")

conv_model = tf.keras.models.Sequential([
    tf.keras.layers.Reshape((8, 8, 13), input_shape=(64,13)),
    tf.keras.layers.Conv2D(32, 3, padding="same", activation=activation),
    tf.keras.layers.Conv2D(32, 3, padding="same", activation=activation),
    tf.keras.layers.Conv2D(32, 3, padding="same", activation=activation),
    tf.keras.layers.MaxPool2D((2, 2),(1, 1), padding="same"), 
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256)
], name="conv_model")
conv_output = conv_model(board_input)

dense_inputs = tf.keras.layers.concatenate([conv_output, misc_input])
aunty_model = tf.keras.models.Sequential([  # tf.concat([input[:1], conv_model_output, input[65:]], axis=0)
    tf.keras.layers.Input(262),
    tf.keras.layers.Dense(131, activation=activation),
    tf.keras.layers.Dense(131, activation=activation),
    tf.keras.layers.Dense(1, activation="sigmoid")
], name="aunty")
aunty_output = aunty_model(dense_inputs)

model = tf.keras.Model(inputs=[board_input, misc_input], outputs=aunty_output)
model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001),
    loss = {"aunty":tf.keras.losses.BinaryCrossentropy()},
    metrics = {"aunty":tf.keras.metrics.BinaryCrossentropy()},
)

In [86]:
# combine both ai and aunty models into a single model with 2 outputs
board_input = tf.keras.Input(shape=(64, 13), name="board")
misc_input = tf.keras.Input(shape=(6), name="misc")
x = {"board":obs_board, "misc":obs_misc}
y = {"ai":df["move"].values, "aunty":df["outcome"].values}

activation = "relu"
conv_model = tf.keras.models.Sequential([
    tf.keras.layers.Reshape((8, 8, 13), input_shape=(64,13)),
    tf.keras.layers.Conv2D(32, 3, padding="same", activation=activation),
    tf.keras.layers.Conv2D(32, 3, padding="same", activation=activation),
    tf.keras.layers.Conv2D(32, 3, padding="same", activation=activation),
    tf.keras.layers.MaxPool2D((2, 2),(1, 1), padding="same"), 
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256)
], name="conv_model")
conv_output = conv_model(board_input)

dense_inputs = tf.keras.layers.concatenate([conv_output, misc_input])
ai_model = tf.keras.models.Sequential([  # tf.concat([input[:1], conv_model_output, input[65:]], axis=0)
    tf.keras.layers.Input(262),
    tf.keras.layers.Dense(131, activation=activation),
    tf.keras.layers.Dense(131, activation=activation),
    tf.keras.layers.Dense(constants.LEN_UCI_MOVES, activation="softmax")
], name="ai")
ai_output = ai_model(dense_inputs)

aunty_model = tf.keras.models.Sequential([  # tf.concat([input[:1], conv_model_output, input[65:]], axis=0)
    tf.keras.layers.Input(262),
    tf.keras.layers.Dense(131, activation=activation),
    tf.keras.layers.Dense(131, activation=activation),
    tf.keras.layers.Dense(1, activation="sigmoid")
], name="aunty")
aunty_output = aunty_model(dense_inputs)

model = tf.keras.Model(inputs=[board_input, misc_input], outputs=[ai_output, aunty_output])
model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001),
    loss = {"aunty":tf.keras.losses.BinaryCrossentropy(), "ai":tf.keras.losses.SparseCategoricalCrossentropy()},
    metrics = {"aunty":tf.keras.metrics.BinaryCrossentropy(), "ai":tf.keras.metrics.SparseCategoricalAccuracy()},
)

model.fit(x, y, batch_size=64, epochs=1)



<keras.callbacks.History at 0x303317880>