In [1]:
import sys
import os
import yaml
import json

sys.path.append('../../')

from constants import ROOT_DIR


In [2]:
YAML_FILE = '../../configs/model_configs/lstm_tic_tac_toe_single_agent.yml'
from src.models.lstm_model import create_model

In [3]:
with open(YAML_FILE, 'r') as f:
    config = yaml.safe_load(f)
print(config)

{'name': 'lstm_tic_tac_toe_single_agent_1k_model', 'model': {'type': 'LSTM', 'embedding_dim': 128, 'hidden_size': 256, 'num_layers': 2}, 'training': {'batch_size': 64, 'learning_rate': 0.001, 'num_epochs': 10}, 'data': {'game': 'tic-tac-toe', 'sequence_length': 20, 'max_event_length': 10, 'path': '/games/tic-tac-toe/1k_single_agent.csv'}}


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence
from src.data.datasets.event_dataset import EventDataset

In [5]:
def collate_fn(batch):
    batch_input, batch_target, sequences, targets = zip(*batch)
    
    # Pad batch_target to the maximum length in the batch
    max_length = max(len(target) for target in batch_target)
    batch_target = [torch.nn.functional.pad(target, (0, max_length - len(target)), value=0) for target in batch_target]
    
    batch_input = [item for sublist in batch_input for item in sublist]
    batch_input = torch.nn.utils.rnn.pad_sequence(batch_input, batch_first=True, padding_value=0)
    batch_target = torch.stack(batch_target)
    
    return batch_input, batch_target, sequences, targets

def train_model(config):
    dataset = EventDataset(config['data']['path'], config['data']['sequence_length'])
    dataloader = DataLoader(dataset, batch_size=config['training']['batch_size'], shuffle=True, collate_fn=collate_fn)

    model = create_model(config, dataset.vocab_size)
    criterion = nn.CrossEntropyLoss(ignore_index=0)  # Ignore padding index
    optimizer = optim.Adam(model.parameters(), lr=config['training']['learning_rate'])

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    print(f"Vocabulary size: {dataset.vocab_size}")
    print(f"Device: {device}")

    for epoch in range(config['training']['num_epochs']):
        model.train()
        total_loss = 0
        for batch_idx, (batch_input, batch_target, _, _) in enumerate(dataloader):
            # print(f"\nBatch {batch_idx + 1}:")
            # print(f"Input shape: {batch_input.shape}")
            # print(f"Target shape: {batch_target.shape}")

            batch_input, batch_target = batch_input.to(device), batch_target.to(device)
            
            optimizer.zero_grad()
            output = model(batch_input)
            # print(f"Raw output shape: {output.shape}")
            
            # Reshape output and target tensors
            batch_size, seq_len, vocab_size = output.shape
            output = output.contiguous().view(-1, vocab_size)
            batch_target = batch_target.view(-1)
            
            # print(f"Reshaped output shape: {output.shape}")
            # print(f"Reshaped target shape: {batch_target.shape}")
            
            # Ensure output and target have the same batch size
            min_length = min(output.size(0), batch_target.size(0))
            output = output[:min_length]
            batch_target = batch_target[:min_length]
            
            # print(f"Final output shape: {output.shape}")
            # print(f"Final target shape: {batch_target.shape}")

            try:
                loss = criterion(output, batch_target)
                # print(f"Loss: {loss.item()}")

                loss.backward()
                optimizer.step()

                total_loss += loss.item()
            except RuntimeError as e:
                print(f"Runtime error during training: {e}")
                continue

            # if batch_idx % 10 == 0:
            #     print(f"Epoch {epoch+1}, Batch {batch_idx+1}/{len(dataloader)}, Loss: {loss.item():.4f}")

        print(f"Epoch {epoch+1}/{config['training']['num_epochs']}, Average Loss: {total_loss/len(dataloader):.4f}")

    torch.save(model.state_dict(), f'../../results/models/{config["name"]}.pth')
    print(f"\nModel saved to ../../results/models/{config['name']}.pth")
     
def evaluate_model(config):
    dataset = EventDataset(config['data']['path'], config['data']['sequence_length'])
    dataloader = DataLoader(dataset, batch_size=config['training']['batch_size'], shuffle=False, collate_fn=collate_fn)

    model = create_model(config, dataset.vocab_size)
    model.load_state_dict(torch.load(f'../../results/models/{config["name"]}.pth'))
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    correct = 0
    total = 0

    with torch.no_grad():
        for batch_input, batch_target, _, _ in dataloader:
            batch_input, batch_target = batch_input.to(device), batch_target.to(device)
            
            outputs = model(batch_input)
            
            # Reshape outputs and target
            batch_size, seq_len, vocab_size = outputs.shape
            outputs = outputs.contiguous().view(-1, vocab_size)
            batch_target = batch_target.contiguous().view(-1)
            
            # Ensure outputs and target have the same batch size
            min_length = min(outputs.size(0), batch_target.size(0))
            outputs = outputs[:min_length]
            batch_target = batch_target[:min_length]
            
            _, predicted = torch.max(outputs, 1)
            
            # Ignore padded elements (assuming 0 is the padding index)
            mask = batch_target != 0
            predicted = predicted[mask]
            batch_target = batch_target[mask]
            
            total += batch_target.size(0)
            correct += (predicted == batch_target).sum().item()

    accuracy = 100 * correct / total
    print(f'Accuracy on the test set: {accuracy:.2f}%')
    
def evaluate_custom_log(config, lower, upper):
    dataset = EventDataset(config['data']['path'], config['data']['sequence_length'])
    
    model = create_model(config, dataset.vocab_size)
    model.load_state_dict(torch.load(f'../../results/models/{config["name"]}.pth'))
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    results = []

    with torch.no_grad():
        for idx in range(lower, min(upper + 1, len(dataset))):
            input_tensor, target_tensor, input_sequence, target_event = dataset[idx]
            
            # Ensure input_tensor is 3D: (batch_size, sequence_length, input_size)
            input_tensor = pad_sequence(input_tensor, batch_first=True).unsqueeze(0).to(device)
            
            # Print shape for debugging
            print(f"Input tensor shape: {input_tensor.shape}")
            
            output = model(input_tensor)
            
            # Assuming output shape is (batch_size, sequence_length, vocab_size)
            predicted = output.argmax(dim=-1)
            
            input_events = [dataset.decode_event(event) for event in input_tensor[0]]
            actual_next_event = dataset.decode_event(target_tensor)
            predicted_next_event = dataset.decode_event(predicted[0, -1])  # Take the last prediction
            
            results.append({
                'input': input_events,
                'actual': actual_next_event,
                'predicted': predicted_next_event
            })

    return results

def display_evaluation_results(results):
    for i, result in enumerate(results, 1):
        print(f"\nSample {i}:")
        print("Input sequence:")
        for event in result['input']:
            print(f"  Cycle: {event['cycle']}, Event: {event['event_type']}, Agent: {event['agent_id']}, Context: {event['context']}")
        print("Actual next event:")
        print(f"  Cycle: {result['actual']['cycle']}, Event: {result['actual']['event_type']}, Agent: {result['actual']['agent_id']}, Context: {result['actual']['context']}")
        print("Predicted next event:")
        print(f"  Cycle: {result['predicted']['cycle']}, Event: {result['predicted']['event_type']}, Agent: {result['predicted']['agent_id']}, Context: {result['predicted']['context']}")
        print("-" * 50)

In [6]:
train_model(config=config)

  from .autonotebook import tqdm as notebook_tqdm


Vocabulary size: 552
Device: cuda


  input_tensor = [torch.tensor(seq).clone().detach() for seq in input_seq]


Epoch 1/10, Average Loss: 2.8432
Epoch 2/10, Average Loss: 2.6632
Epoch 3/10, Average Loss: 2.6642
Epoch 4/10, Average Loss: 2.6644
Epoch 5/10, Average Loss: 2.6656
Epoch 6/10, Average Loss: 2.6641
Epoch 7/10, Average Loss: 2.6637
Epoch 8/10, Average Loss: 2.6632
Epoch 9/10, Average Loss: 2.6627
Epoch 10/10, Average Loss: 2.6627

Model saved to ../../results/models/lstm_tic_tac_toe_single_agent_1k_model.pth


In [7]:
evaluate_model(config=config)

  input_tensor = [torch.tensor(seq).clone().detach() for seq in input_seq]


Accuracy on the test set: 20.00%


In [8]:
def evaluate_custom_log(config, lower, upper):
    dataset = EventDataset(config['data']['path'], config['data']['sequence_length'])
    
    model = create_model(config, dataset.vocab_size)
    model.load_state_dict(torch.load(f'{ROOT_DIR}/results/models/{config["name"]}.pth'))
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    results = []

    with torch.no_grad():
        for idx in range(lower, min(upper + 1, len(dataset))):
            input_tensor, target_tensor, input_sequence, target_event = dataset[idx]
            
            print(f"Original input_tensor shape: {[t.shape for t in input_tensor]}")
            
            # Pad the input_tensor sequences
            input_tensor_padded = pad_sequence(input_tensor, batch_first=True)
            print(f"Padded input_tensor shape: {input_tensor_padded.shape}")
            
            # Add batch dimension
            input_tensor_padded = input_tensor_padded.unsqueeze(0).to(device)
            print(f"Final input_tensor shape: {input_tensor_padded.shape}")
            
            # Get model prediction
            output = model(input_tensor_padded)
            print(f"Model output shape: {output.shape}")
            
            # Take the last time step prediction
            predicted = output[0, -1].argmax(dim=-1)
            print(f"Predicted shape: {predicted.shape}")
            
            # Decode events
            input_events = [dataset.decode_event(event) for event in input_tensor]
            actual_next_event = dataset.decode_event(target_tensor)
            
            # Create a tensor with the same length as target_tensor
            predicted_tensor = torch.full_like(target_tensor, fill_value=dataset.vocab['<PAD>'])
            predicted_tensor[:len(predicted)] = predicted
            
            predicted_next_event = dataset.decode_event(predicted_tensor.cpu())
            
            results.append({
                'input': input_events,
                'actual': actual_next_event,
                'predicted': predicted_next_event
            })

    return results

# Usage
results = evaluate_custom_log(config, 20, 40)
display_evaluation_results(results)

  input_tensor = [torch.tensor(seq).clone().detach() for seq in input_seq]


Original input_tensor shape: [torch.Size([5]), torch.Size([5]), torch.Size([5]), torch.Size([5]), torch.Size([5]), torch.Size([5]), torch.Size([5]), torch.Size([7]), torch.Size([3]), torch.Size([5]), torch.Size([5]), torch.Size([5]), torch.Size([5]), torch.Size([5]), torch.Size([5]), torch.Size([5]), torch.Size([7]), torch.Size([3]), torch.Size([5]), torch.Size([5])]
Padded input_tensor shape: torch.Size([20, 7])
Final input_tensor shape: torch.Size([1, 20, 7])


ValueError: LSTM: Expected input to be 2D or 3D, got 4D instead