In [None]:
from utils.caro_prepare_training import CaroPrepareTraining

number_of_rows = 10
number_of_columns = 10

X, Y_policy, Y_value, data = CaroPrepareTraining.prepare_training_data(number_of_rows, number_of_columns, num_games=10)


In [None]:
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim

from caro_net import CaroNet


X_tensor = torch.tensor(X, dtype=torch.float32)
Y_policy_tensor = torch.tensor(Y_policy, dtype=torch.float32)
Y_value_tensor = torch.tensor(Y_value, dtype=torch.float32)

dataset = TensorDataset(X_tensor, Y_policy_tensor, Y_value_tensor)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

model = CaroNet(number_of_rows, number_of_columns)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
import time
import torch.nn.functional as F

num_epochs = 20

for epoch in range(num_epochs):
  model.train()
  total_loss = 0
  for X_batch, Yp_batch, Yv_batch in loader:
    X_batch = X_batch.to(device)
    Yp_batch = Yp_batch.to(device)
    Yv_batch = Yv_batch.to(device)

    policy_pred, value_pred = model(X_batch)

    policy_loss = -(Yp_batch * policy_pred).sum(dim=1).mean()
    value_loss = F.mse_loss(value_pred, Yv_batch)
    loss = policy_loss + value_loss

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    total_loss += loss.item()
  
  print(f"Epoch {epoch+1}/{num_epochs} - Loss: {total_loss/len(loader):.4f}")

torch.save(model.state_dict(), f"trained/caronet_weights_{time.time()}.pth")
print("✅ Training finished!")

In [None]:
model = CaroNet(number_of_rows, number_of_columns)
model.load_state_dict(torch.load("trained/caronet_weights_1760758201.407465.pth"))
model.eval()