In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch.utils.data import TensorDataset, DataLoader, random_split
from ray import tune
from ray.tune.schedulers import PopulationBasedTraining
from ray.air.integrations.wandb import WandbLoggerCallback, setup_wandb

file_path = 'path_to_your_file/random_random_10k_games.txt'

# Load and parse the data
data_list = []
labels_list = []
with open(file_path, 'r') as file:
    for line in file:
        features, label = line.strip().split(' || ')
        features = [int(x) for x in features.split(',')]
        label = int(label)
        data_list.append(features)
        labels_list.append(label)

# Convert lists to NumPy arrays
data_np = np.array(data_list, dtype=np.float32)
labels_np = np.array(labels_list, dtype=np.long)

# Convert NumPy arrays to PyTorch tensors
data_tensor = torch.from_numpy(data_np)
labels_tensor = torch.from_numpy(labels_np)

# Create a TensorDataset
dataset = TensorDataset(data_tensor, labels_tensor)

# Split the dataset into training and testing sets
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create DataLoaders for the training and testing sets
batch_size = 64  # You can adjust the batch size
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define the custom network structure
class CustomNet(nn.Module):
    def __init__(self, input_size, hidden_layers, num_classes):
        super(CustomNet, self).__init__()
        layers = [nn.Linear(input_size, hidden_layers[0]), nn.ReLU()]
        for i in range(len(hidden_layers) - 1):
            layers += [nn.Linear(hidden_layers[i], hidden_layers[i + 1]), nn.ReLU()]
        layers.append(nn.Linear(hidden_layers[-1], num_classes))
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)


# Training function
def train_model(config, train_loader, test_loader):
    wandb = setup_wandb(config, project="project_name")
    net = CustomNet(config["input_size"], config["hidden_layers"], config["num_classes"])
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=config["lr"])

    for epoch in range(config["num_epochs"]):
        net.train()  # Set the model to training mode
        for inputs, labels in train_loader:
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        accuracy = evaluate_model(net, test_loader)
        wandb.log({"epoch": epoch, "loss": loss.item(), "accuracy": accuracy})
        tune.report(accuracy=accuracy)

# Evaluation function
def evaluate_model(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = correct / total
    return accuracy

# Mutation function for PBT
def mutate_layers(config):
    # Example mutation logic - can be adjusted
    new_layers = config["hidden_layers"]
    if np.random.rand() < 0.5 and len(new_layers) > 1:
        new_layers.pop()
    else:
        new_layers.append(np.random.choice([32, 64, 128]))
    return {"hidden_layers": new_layers}

# PBT Setup
scheduler = PopulationBasedTraining(
    time_attr="training_iteration",
    perturbation_interval=5,
    hyperparam_mutations={
        "lr": tune.loguniform(1e-4, 1e-1),
        "hidden_layers": mutate_layers  
    }
)

# Run the PBT
analysis = tune.run(
    # Pass the DataLoaders to the train_model function
    lambda config: train_model(config, train_loader, test_loader),
    name="pbt_test",
    scheduler=scheduler,
    num_samples=4,
    config={
        "lr": tune.loguniform(1e-4, 1e-1),
        "num_epochs": 10,
        "input_size": 784,
        "num_classes": 2,  # Update the number of classes if different
        "hidden_layers": [128],
    },
    callbacks=[WandbLoggerCallback(project="is-project", api_key="a540d30f4375fd2e181491b78a9339e8feaa53e4")] 
)

best_config = analysis.get_best_config(metric="accuracy", mode="max")
print("Best config:", best_config)