In [1]:
#Loading data logic
import numpy as np
from sklearn.utils import shuffle

dimensions = 6


boards = []
labels = []

with open("hex_data.csv", "r") as file:
    lines = file.readlines()

#skip the first line
data_lines = lines[1:]

#split lines into boards and labels
for line in data_lines:
    splitted = line.split(',')
    board = splitted[0]
    label = splitted[1]
    boards.append(board)
    labels.append(label)

#convert the labels to a numpy array
labels = np.array(labels, dtype=np.uint32)

print("Number of boards:", len(boards))
print("First board:", boards[0])
print("First label:", labels[0])

boards, labels = shuffle(boards, labels, random_state=42)

Number of boards: 10000
First board: XEOXOXOXOOOXOOOOOOOEOOXXXXOXXXXOXXXE
First label: 1


In [2]:
#splitting dataset

ratio = 0.9
split_point = int(len(labels) * ratio)

#slice dataset
boards_train = boards[:split_point]
boards_test = boards[split_point:]
labels_train = labels[:split_point]
labels_test = labels[split_point:]

print("Training samples: ", len(boards_train))
print("Test samples: ", len(boards_test))
print("First training board: ", boards_train[0])
print("First training label: ", labels_train[0])
print("First test board: ", boards_test[0])
print("First test label: ", labels_test[0])

Training samples:  9000
Test samples:  1000
First training board:  XEXOOXXOXOEOEEXOOXXOXXXOOEXOXXOEOOXO
First training label:  0
First test board:  OXXOOOEXEEEEOXOXXOOXEXEXOXOXXOOXEOXO
First test label:  0


In [3]:
# setting up training graphs

from GraphTsetlinMachine.graphs import Graphs

#symbols for nodes
symbols = ["X", "O", "E"]
for i in range(dimensions):
    symbols.append(f"Row:{i}")
    symbols.append(f"Column:{i}")

#initialize the graphs object
graphs_train = Graphs(
    number_of_graphs=len(boards_train),
    symbols=symbols,
    hypervector_size=128,
    hypervector_bits=2,
    double_hashing=False
)

#e.g. 49 for 7x7 board
number_of_nodes = dimensions * dimensions

#set the number of nodes for each graph
for graph_id in range(len(boards_train)):
    graphs_train.set_number_of_graph_nodes(graph_id, number_of_nodes)


#function to get neighbors of a node (cell)
def get_neighbors(row, column, dimensions):
    neighbors = []
    potential_neighbors = [
        (row - 1, column),  #north-west
        (row + 1, column),  #south-east
        (row, column - 1),  #west
        (row, column + 1),  #east
        (row - 1, column + 1),  #north-east
        (row + 1, column - 1),  #south-west
    ]

    for neighbor_row, neighbor_column in potential_neighbors:
        if 0 <= neighbor_row < dimensions and 0 <= neighbor_column < dimensions:
            neighbors.append((neighbor_row, neighbor_column))

    return neighbors


graphs_train.prepare_node_configuration()

#add the nodes to each graph
for graph_id, board in enumerate(boards_train):
    for node_id in range(number_of_nodes):
        row = node_id // dimensions
        column = node_id % dimensions
        neighbors = get_neighbors(row, column, dimensions)

        graphs_train.add_graph_node(graph_id, node_id, len(neighbors))

graphs_train.prepare_edge_configuration()

#add the edges
for graph_id, board in enumerate(boards_train):
    for node_id in range(number_of_nodes):
        row = node_id // dimensions
        column = node_id % dimensions
        neighbors = get_neighbors(row, column, dimensions)

        for (neighbor_row, neighbor_column) in neighbors:
            neighbor_id = neighbor_row * dimensions + neighbor_column

            graphs_train.add_graph_node_edge(graph_id, node_id, neighbor_id, "adjacent_cell")

#add the node properties
for graph_id, board in enumerate(boards_train):
    for node_id in range(number_of_nodes):
        row = node_id // dimensions
        column = node_id % dimensions

        #X means cell is occupied by player 0, O is player 1, and E is empty
        cell_value = board[node_id]
        if cell_value == 'X':
            graphs_train.add_graph_node_property(graph_id, node_id, "X")
        elif cell_value == 'O':
            graphs_train.add_graph_node_property(graph_id, node_id, "O")
        elif cell_value == 'E':
            graphs_train.add_graph_node_property(graph_id, node_id, "E")

        graphs_train.add_graph_node_property(graph_id, node_id, f"Row:{row}")
        graphs_train.add_graph_node_property(graph_id, node_id, f"Column:{column}")

graphs_train.encode()

KeyboardInterrupt: 

In [19]:
#setting up the test graphs
#its the same as the training graphs but we need to set them up separately

graphs_test = Graphs(
    number_of_graphs=len(boards_test),
    init_with=graphs_train
)

for graph_id in range(len(boards_test)):
    graphs_test.set_number_of_graph_nodes(graph_id, number_of_nodes)

graphs_test.prepare_node_configuration()

#add nodes
for graph_id, board in enumerate(boards_test):
    for node_id in range(number_of_nodes):
        row = node_id // dimensions
        column = node_id % dimensions
        neighbors = get_neighbors(row, column, dimensions)

        graphs_test.add_graph_node(graph_id, node_id, len(neighbors))

graphs_test.prepare_edge_configuration()

#add node edges
for graph_id, board in enumerate(boards_test):
    for node_id in range(number_of_nodes):
        row = node_id // dimensions
        column = node_id % dimensions
        neighbors = get_neighbors(row, column, dimensions)

        for (neighbor_row, neighbor_column) in neighbors:
            neighbor_id = neighbor_row * dimensions + neighbor_column

            graphs_test.add_graph_node_edge(graph_id, node_id, neighbor_id, "adjacent_cell")

#add node properties
for graph_id, board in enumerate(boards_test):
    for node_id in range(number_of_nodes):
        row = node_id // dimensions
        column = node_id % dimensions
        cell_value = board[node_id]

        if cell_value == 'X':
            graphs_test.add_graph_node_property(graph_id, node_id, "X")
        elif cell_value == 'O':
            graphs_test.add_graph_node_property(graph_id, node_id, "O")
        elif cell_value == 'E':
            graphs_test.add_graph_node_property(graph_id, node_id, "E")

        graphs_test.add_graph_node_property(graph_id, node_id, f"Row:{row}")
        graphs_test.add_graph_node_property(graph_id, node_id, f"Column:{column}")

graphs_test.encode()

In [None]:
#training logic
from GraphTsetlinMachine.tm import MultiClassGraphTsetlinMachine
import plotly.graph_objects as go
from IPython.display import display

epochs = 1000000000000

tm = MultiClassGraphTsetlinMachine(
    number_of_clauses=750,
    T=10000,
    s=0.1,
    q=1.0,
    max_included_literals=None,
    boost_true_positive_feedback=1,
    number_of_state_bits=12,
    depth=2,
    message_size=256,
    message_bits=2,
    double_hashing=False,
    grid=(16 * 13 * 4, 1, 1),
    block=(128, 1, 1)
)

train_accuracies = []
test_accuracies = []
max_train_accuracy = 0
max_test_accuracy = 0
max_train_epoch = 0
not_hit_100 = True
accuracy_counter = 0

fig = go.FigureWidget(
    data=[
        go.Scatter(x=[], y=[], mode='lines', name="Training Accuracy"),
        go.Scatter(x=[], y=[], mode='lines', name="Testing Accuracy"),
        go.Scatter(x=[], y=[], mode='lines', name="Test Regression", line=dict(dash='dash', color='gray'))
    ],
    layout=go.Layout(
        title=f"Training and Testing Accuracy, {dimensions}x{dimensions}",
        xaxis=dict(title="Epoch"),
        yaxis=dict(title="Accuracy"),
        annotations=[
            dict(text="Train: - , Test: -", x=0.5, y=1.2, xref="paper", yref="paper", showarrow=False,
                 font=dict(size=18)
                 )
        ]
    )
)

display(fig)

for epoch in range(epochs):
    tm.fit(graphs_train, labels_train, 1, True)

    #evaluate on training data and testing data
    train_prediction = tm.predict(graphs_train)
    train_accuracy = (train_prediction == labels_train).mean()
    test_prediction = tm.predict(graphs_test)
    test_accuracy = (test_prediction == labels_test).mean()

    if train_accuracy > max_train_accuracy:
        max_train_accuracy = train_accuracy
    if test_accuracy > max_test_accuracy:
        max_test_accuracy = test_accuracy

    train_accuracies.append(train_accuracy)
    test_accuracies.append(test_accuracy)

    #compute regression line when more than one point
    x_data = np.arange(1, epoch + 2)
    if len(test_accuracies) > 1:
        coeffs = np.polyfit(x_data, test_accuracies, 1)
        test_poly = np.poly1d(coeffs)
        reg_y = test_poly(x_data)
    else:
        reg_y = [test_accuracies[0]]

    #update the plot
    with fig.batch_update():
        fig.data[0].x = list(range(1, epoch + 2))
        fig.data[0].y = train_accuracies
        fig.data[1].x = list(range(1, epoch + 2))
        fig.data[1].y = test_accuracies
        fig.data[2].x = x_data
        fig.data[2].y = reg_y

        fig.layout.annotations = [
            dict(
                text=f"Train: {train_accuracy:.3f} (max: {max_train_accuracy:.3f}) , Test: {test_accuracy:.3f} (max: {max_test_accuracy:.3f}). >0.98 Train x 10 at epoch {max_train_epoch}",
                x=0.5,
                y=1.2, xref="paper", yref="paper", showarrow=False, font=dict(size=18)
            )
        ]
        
        if train_accuracy >= 0.98:
            accuracy_counter += 1

        if accuracy_counter >= 10 and not_hit_100:
            max_train_epoch = epoch
            not_hit_100 = False

        if test_accuracy >= 0.999 and train_accuracy >= 0.999:
            break

    #print(f"Epoch {epoch + 1} / {epochs}: Train Accuracy: {train_accuracy:.3f}, Test Accuracy: {test_accuracy:.3f}")


Initialization of sparse structure.


FigureWidget({
    'data': [{'mode': 'lines',
              'name': 'Training Accuracy',
              'type': 'scatter',
              'uid': 'e000ddb9-67d6-4b24-9806-6b592c2d8aa4',
              'x': [],
              'y': []},
             {'mode': 'lines',
              'name': 'Testing Accuracy',
              'type': 'scatter',
              'uid': '6e04a7c8-7021-4106-89f0-43b1719d1d43',
              'x': [],
              'y': []},
             {'line': {'color': 'gray', 'dash': 'dash'},
              'mode': 'lines',
              'name': 'Test Regression',
              'type': 'scatter',
              'uid': '4d40ff7c-d686-46d2-888d-7380ed5efd9f',
              'x': [],
              'y': []}],
    'layout': {'annotations': [{'font': {'size': 18},
                                'showarrow': False,
                                'text': 'Train: - , Test: -',
                                'x': 0.5,
                                'xref': 'paper',
                          