In [1]:
import sys
import os
import yaml
import json

sys.path.append('../../')

In [2]:
YAML_FILE = '../../configs/model_configs/lstm_tic_tac_toe_single_agent.yml'
from src.models.lstm_model import create_model

In [3]:
with open(YAML_FILE, 'r') as f:
    config = yaml.safe_load(f)
print(config)

{'name': 'lstm_tic_tac_toe_single_agent_1k_model', 'model': {'type': 'LSTM', 'embedding_dim': 128, 'hidden_size': 256, 'num_layers': 2}, 'training': {'batch_size': 64, 'learning_rate': 0.001, 'num_epochs': 3}, 'data': {'game': 'tic-tac-toe', 'sequence_length': 20, 'max_event_length': 10, 'path': '/games/tic-tac-toe/1k_single_agent.csv'}}


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence
from src.data.datasets.event_dataset import EventDataset

In [5]:
def collate_fn(batch):
    batch_input, batch_target, sequences, targets = zip(*batch)
    batch_input = [item for sublist in batch_input for item in sublist]
    batch_input = pad_sequence(batch_input, batch_first=True)
    batch_target = pad_sequence([torch.tensor(t) for t in batch_target], batch_first=True, padding_value=0)
    return batch_input, batch_target, sequences, targets

def train_model(config):
    dataset = EventDataset(config['data']['path'], config['data']['sequence_length'])
    dataloader = DataLoader(dataset, batch_size=config['training']['batch_size'], shuffle=True, collate_fn=collate_fn)

    model = create_model(config, dataset.vocab_size)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config['training']['learning_rate'])

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    for epoch in range(config['training']['num_epochs']):
        model.train()
        total_loss = 0
        for batch_input, batch_target, _, _ in dataloader:
            batch_input, batch_target = batch_input.to(device), batch_target.to(device)
            
            optimizer.zero_grad()
            output = model(batch_input)
            # Ensure output and target tensors are of the same shape
            output = output.view(-1, dataset.vocab_size)
            batch_target = batch_target.view(-1)
            loss = criterion(output, batch_target)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch+1}/{config['training']['num_epochs']}, Loss: {total_loss/len(dataloader):.4f}")

    torch.save(model.state_dict(), f'../../results/models/{config["name"]}.pth')
    print(f"\nModel saved to ../../results/models/{config["name"]}.pth")

def evaluate_model(config):
    dataset = EventDataset(config['data']['path'], config['data']['sequence_length'])
    dataloader = DataLoader(dataset, batch_size=config['training']['batch_size'], shuffle=False, collate_fn=collate_fn)

    model = create_model(config, dataset.vocab_size)
    model.load_state_dict(torch.load(f'../../results/models/{config["name"]}.pth'))
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    correct = 0
    total = 0

    with torch.no_grad():
        for batch_input, batch_target, _, _ in dataloader:
            batch_input, batch_target = batch_input.to(device), batch_target.to(device)
            
            outputs = model(batch_input)
            _, predicted = torch.max(outputs.data, 1)
            total += batch_target.size(0)
            correct += (predicted == batch_target).sum().item()

    accuracy = 100 * correct / total
    print(f'Accuracy on the test set: {accuracy:.2f}%')

def evaluate_custom_log(config, lower, upper):
    dataset = EventDataset(config['data']['path'], config['data']['sequence_length'])
    
    model = create_model(config, dataset.vocab_size)
    model.load_state_dict(torch.load(f'../../results/models/{config["name"]}.pth'))
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    results = []

    with torch.no_grad():
        for idx in range(lower, min(upper + 1, len(dataset))):
            input_tensor, target_tensor, input_sequence, target_event = dataset[idx]
            input_tensor = pad_sequence(input_tensor, batch_first=True).unsqueeze(0).to(device)
            
            output = model(input_tensor)
            predicted = output.argmax(dim=-1)
            
            input_events = [dataset.decode_event(event) for event in input_tensor[0]]
            actual_next_event = dataset.decode_event(target_tensor)
            predicted_next_event = dataset.decode_event(predicted[0])
            
            results.append({
                'input': input_events,
                'actual': actual_next_event,
                'predicted': predicted_next_event
            })

    return results

def display_evaluation_results(results):
    for i, result in enumerate(results, 1):
        print(f"\nSample {i}:")
        print("Input sequence:")
        for event in result['input']:
            print(f"  Cycle: {event['cycle']}, Event: {event['event_type']}, Agent: {event['agent_id']}, Context: {event['context']}")
        print("Actual next event:")
        print(f"  Cycle: {result['actual']['cycle']}, Event: {result['actual']['event_type']}, Agent: {result['actual']['agent_id']}, Context: {result['actual']['context']}")
        print("Predicted next event:")
        print(f"  Cycle: {result['predicted']['cycle']}, Event: {result['predicted']['event_type']}, Agent: {result['predicted']['agent_id']}, Context: {result['predicted']['context']}")
        print("-" * 50)

In [6]:
train_model(config=config)

  input_tensor = [torch.tensor(seq) for seq in input_seq]


ValueError: Expected input batch_size (1280) to match target batch_size (448).

In [None]:
evaluate_model(config=config)

In [None]:
results = evaluate_custom_log(config, 20, 40)

In [None]:
display_evaluation_results(results)