In [1]:
import numpy as np
import torch
from numpy.typing import NDArray


def list_to_tensor_numpy(grid_list: list[NDArray[np.float64]]) -> torch.Tensor:
    stacked = np.stack(grid_list)
    return torch.tensor(stacked, dtype=torch.float32)

In [None]:
from simulator.game.connect import Config  # type: ignore[import]

from alphazero_implementation.models.games.connect4 import BasicNN

game_config = Config(6, 7, 4)


model = BasicNN(
    height=game_config.height,
    width=game_config.width,
    max_actions=game_config.width,
    num_players=game_config.num_players,
)

initial_state = game_config.sample_initial_state()


state_inputs = list_to_tensor_numpy([initial_state.grid for _ in range(3)])

tuples = [([0.0] * game_config.width, [1.0, -1.0]) for _ in range(3)]

target_values = torch.FloatTensor(
    [[0.0] * game_config.width + [1.0, -1.0] for _ in range(3)]
)

print(target_values)

output_values = model.forward(state_inputs)
output_values


In [None]:
import lightning as L
from torch.utils.data import DataLoader, TensorDataset

trainer = L.Trainer(max_epochs=10, log_every_n_steps=10, enable_progress_bar=True)

policies: list[list[float]] = [[0.0] * game_config.width for _ in range(3)]
values: list[list[float]] = [[0.0] * game_config.num_players for _ in range(3)]

policy_targets = torch.FloatTensor(policies)
value_targets = torch.FloatTensor(values)

dataset = TensorDataset(state_inputs, policy_targets, value_targets)

dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

trainer.fit(model, dataloader)


In [None]:
from alphazero_implementation.core.training import Trainer

num_iterations = 50
epochs_per_iter = 1
simulations_per_episode = 200
episodes_per_iter = 20

trainer = Trainer(
    model=model,
    episodes_per_iter=episodes_per_iter,
    simulations_per_episode=simulations_per_episode,
)

trainer.train(
    num_iterations=num_iterations,
    epochs_per_iter=epochs_per_iter,
    initial_state=initial_state,
    buffer_size=1000,
    save_every_n_iterations=10,
)
