In [1]:
import numpy as np

from hexagon import HexagonalGrid

pygame-ce 2.5.2 (SDL 2.30.8, Python 3.13.0)


In [2]:
grid = HexagonalGrid(6)
grid.randomize_grid()

In [3]:
npg = grid.to_numpy()

In [4]:
npg.flatten()

array([0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8)

## Genetic

In [79]:
S = 91
H = 128
MUTATION_SCALE = 0.1


def relu(x: np.ndarray) -> np.ndarray:
    return np.maximum(0, x)


def softmax(x):
    z = x - max(x)
    numerator = np.exp(z)
    denominator = np.sum(numerator)
    softmax = numerator / denominator
    return softmax


class Entity:
    def __init__(
        self,
        weights1: np.ndarray | None = None,
        weights2: np.ndarray | None = None,
        generation: int = 0,
    ):
        self.weights1 = np.random.randn(3 * S, H) if weights1 is None else weights1
        self.weights2 = np.random.randn(H, S) if weights2 is None else weights2
        self.generation = generation
        self.score = 0

    def mutated(self) -> "Entity":
        return Entity(
            self.weights1 + MUTATION_SCALE * np.random.randn(3 * S, H),
            self.weights2 + MUTATION_SCALE * np.random.randn(H, S),
            self.generation + 1,
        )

    def __call__(self, board: np.ndarray) -> np.ndarray:
        return softmax(np.dot(np.dot(board, self.weights1), self.weights2))

### Dummy objective

In [38]:
# learn to pick the first hexagon in the empty grid
EPOCHS = 100

grid = HexagonalGrid(6)
population = [Entity() for _ in range(100)]

for epoch in range(EPOCHS):
    for entity in population:
        prediction = entity(grid.to_numpy().flatten())
        entity.score += prediction[0]

    population.sort(key=lambda x: x.score, reverse=True)
    population = population[:50]

    new_population = []
    for entity in population:
        new_population.append(entity.mutated())

    population += new_population

In [39]:
population.sort(key=lambda x: x.score, reverse=True)

In [42]:
emp_grid = HexagonalGrid(6)
print(f'The probability to pick the first hexagon is {population[0](emp_grid.to_numpy().flatten())[0]:%}')

The probability to pick the first hexagon is 99.990183%


### Learning

In [80]:
from game import Game
from hexagon import State

In [81]:
def play_one_game(player1: Entity, player2: Entity, verbose: bool = False):
    game = Game(6)
    while not game.is_over:
        if game.current_player == State.ONE:
            move = np.argmax(player1(game.grid.to_numpy().flatten()))
        else:
            move = np.argmax(player2(game.grid.to_numpy().flatten()))
        hex_ = game.grid.at(game.grid.coordinates_lst[move])
        move_verdict = game.move(hex_)
        if verbose:
            print(f"{game.current_player} -> {hex_} ({move_verdict})")
        if not move_verdict:
            if game.current_player == State.ONE:
                player1.score -= 1
            else:
                player2.score -= 1
            break

    if game.verdict == State.ONE:
        player1.score += 1
        player2.score -= 1
    elif game.verdict == State.TWO:
        player1.score -= 1
        player2.score += 1

In [82]:
from itertools import product

player1 = [Entity() for _ in range(20)]
player2 = [Entity() for _ in range(20)]

EPOCHS = 100

for epoch in range(EPOCHS):
    for p1, p2 in product(player1, player2):
        play_one_game(p1, p2)
    
    player1.sort(key=lambda x: x.score, reverse=True)
    player2.sort(key=lambda x: x.score, reverse=True)

    player1 = player1[:10]
    player2 = player2[:10]

    new_player1 = [p.mutated() for p in player1]
    new_player2 = [p.mutated() for p in player2]

    player1 += new_player1
    player2 += new_player2

player1.sort(key=lambda x: x.score, reverse=True)
player2.sort(key=lambda x: x.score, reverse=True)

First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be in the center
First move cannot be

In [83]:
best_player1 = player1[0]
best_player2 = player2[0]

print(best_player1.score, best_player2.score)

0 0


In [75]:
game = Game(6)

In [77]:
play_one_game(best_player1, best_player2, verbose=True)

State.TWO -> Hex((-4, 5, -1) ONE) (True)
State.ONE -> Hex((2, -3, 1) TWO) (True)
State.ONE -> Hex((-4, 5, -1) ONE) (False)


## RL

In [None]:
import torch
import torch.nn as nn

CONV_HIDDEN_CHANNELS = 64
LINEAR_HIDDEN_SIZE = 128
FLATTENED_BOARD_SIZE = 91

class GameModel(nn.Module):
    def __init__(self):
        super(GameModel, self).__init__()
        self.conv1 = nn.Conv1d(3, CONV_HIDDEN_CHANNELS, kernel_size=3, padding=1)  # 3 channels for the board
        self.conv2 = nn.Conv1d(CONV_HIDDEN_CHANNELS, CONV_HIDDEN_CHANNELS, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(CONV_HIDDEN_CHANNELS * FLATTENED_BOARD_SIZE, LINEAR_HIDDEN_SIZE)
        self.fc2 = nn.Linear(LINEAR_HIDDEN_SIZE, FLATTENED_BOARD_SIZE)  # Output logits for each cell

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        return self.fc2(x)  # Returns logits for each position on the board


In [None]:
# Instantiate player-specific models and optimizers
player1_model = GameModel()
player2_model = GameModel()
optimizer1 = optim.Adam(player1_model.parameters(), lr=0.001)
optimizer2 = optim.Adam(player2_model.parameters(), lr=0.001)

# Define memory buffers for each player
memory1 = deque(maxlen=10000)
memory2 = deque(maxlen=10000)

# Self-play training loop
for episode in range(total_episodes):
    game_state = initialize_game()
    done = False
    current_player = 1

    while not done:
        if current_player == 1:
            action = choose_action(player1_model, game_state)  # Choose action for player 1
            new_state, reward, done = step(game_state, action)
            memory1.append((game_state, action, reward, new_state, done))
            game_state = new_state
            current_player = 2
        else:
            action = choose_action(player2_model, game_state)  # Choose action for player 2
            new_state, reward, done = step(game_state, action)
            memory2.append((game_state, action, -reward, new_state, done))  # Negative reward for player 1's loss
            game_state = new_state
            current_player = 1

        # Training steps (periodically, or every few episodes)
        if len(memory1) > batch_size and current_player == 1:
            batch1 = random.sample(memory1, batch_size)
            train_on_batch(player1_model, batch1, optimizer1)

        if len(memory2) > batch_size and current_player == 2:
            batch2 = random.sample(memory2, batch_size)
            train_on_batch(player2_model, batch2, optimizer2)