In [3]:
import json
import sys
import os
# Add the parent directory to sys.path
sys.path.append(os.path.abspath('..'))
from src.tile_definitions import TILE_MAPPING

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau

# Dataset Class

## Level 1

In [2]:
class Level1GameDataset(Dataset):
    def __init__(self, json_file):
        with open(json_file, "r") as f:
            self.data = json.load(f)
            
        print(f"Loaded {len(self.data)} gameplay samples from {json_file}")
        
        # Action to integer mapping
        self.action_mapping = {"UP": 0, "DOWN": 1, "LEFT": 2, "RIGHT": 3}

        # Create reverse mapping (tile_type -> ID) from TILE_MAPPING
        self.tile_type_to_id = {}
        for tile_id, (tile_type, _, _, _, _) in TILE_MAPPING.items():
            self.tile_type_to_id[tile_type] = tile_id
            
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        sample = self.data[idx]
        state = sample["state"]
        action = self.action_mapping[sample["action"]]
        
        # Extract state components
        position = torch.tensor(state["position"], dtype=torch.float32)
        chips_collected = torch.tensor([state["player_collected_chips"]], dtype=torch.float32)
        total_chips_collected = torch.tensor([state["total_collected_chips"]], dtype=torch.float32)
        socket_unlocked = torch.tensor([int(state["socket_unlocked"])], dtype=torch.float32)
        nearest_chip = torch.tensor(state["nearest_chip"], dtype=torch.float32)
        exit_location = torch.tensor(state["exit_position"], dtype=torch.float32)
        
        # Process the full grid
        full_grid = []
        for row in state["full_grid"]:
            processed_row = []
            for tile_type in row:
                # Map to integer using the updated tile definitions
                processed_row.append(self.tile_type_to_id.get(tile_type, 1))
            full_grid.append(processed_row)
        
        full_grid_tensor = torch.tensor(full_grid, dtype=torch.float32)
        
        local_grid = []
        for row in state["local_grid"]:
            processed_row = []
            for tile_type in row:
                # Map to integer using the updated tile definitions
                processed_row.append(self.tile_type_to_id.get(tile_type, 1))
            local_grid.append(processed_row)
        
        local_grid_tensor = torch.tensor(local_grid, dtype=torch.float32)
        
        # Additional state information
        alive = torch.tensor([float(state.get("alive", True))], dtype=torch.float32)
        remaining_chips = torch.tensor([state.get("remaining_chips", 0)], dtype=torch.float32)
        other_player_pos = torch.tensor(state.get("other_player_position", [-1, -1]), dtype=torch.float32)
        
        # Concatenate all state information into a single vector
        state_vector = torch.cat([
            position,
            chips_collected, 
            total_chips_collected,
            socket_unlocked,
            nearest_chip,
            exit_location,
            full_grid_tensor.flatten(),
            local_grid_tensor.flatten(),
            alive,
            remaining_chips,
            other_player_pos
        ])
        
        return state_vector, torch.tensor(action, dtype=torch.long)

## Level 2

In [22]:
class Level2GameDataset(Dataset):
    def __init__(self, json_file):
        """
        Initialize the Level 2 Game Dataset with enhanced state representation
        
        Args:
            json_file (str): Path to the JSON file containing gameplay data
            transform (callable, optional): Optional transform to be applied on a sample
        """
        # Load data
        with open(json_file, "r") as f:
            self.data = json.load(f)
        
        # Action to integer mapping
        self.action_mapping = {"UP": 0, "DOWN": 1, "LEFT": 2, "RIGHT": 3}
        
        
        # Process the TILE_MAPPING to build our encoding dictionary
        self.tile_type_to_id = {}
        for tile_id, (tile_type, _, _, _, _) in TILE_MAPPING.items():
            self.tile_type_to_id[tile_type] = tile_id
        
        # Map for key and boot types
        self.key_mapping = {"RED": 0, "BLUE": 1, "YELLOW": 2, "GREEN": 3}
        self.boot_mapping = {"WATER": 0, "FIRE": 1, "FORCE": 2}
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        """
        Get sample by index
        
        Returns:
            tuple: (state_vector, action)
        """
        sample = self.data[idx]
        state = sample["state"]
        action = self.action_mapping[sample["action"]]
        
        # Extract base state information (similar to Level 1)
        position = torch.tensor(state["position"], dtype=torch.float32)
        chips_collected = torch.tensor([state["player_collected_chips"]], dtype=torch.float32)
        total_chips_collected = torch.tensor([state["total_collected_chips"]], dtype=torch.float32)
        socket_unlocked = torch.tensor([int(state["socket_unlocked"])], dtype=torch.float32)
        nearest_chip = torch.tensor(state["nearest_chip"], dtype=torch.float32)
        nearest_key = torch.tensor(state["nearest_key"], dtype=torch.float32)
        nearest_boot = torch.tensor(state["nearest_boot"], dtype=torch.float32)
        exit_position = torch.tensor(state["exit_position"], dtype=torch.float32)
        
        full_grid = []
        for row in state["full_grid"]:
            processed_row = []
            for tile_type in row:
                # Map to integer using the updated tile definitions
                processed_row.append(self.tile_type_to_id.get(tile_type, 1))
            full_grid.append(processed_row)
        
        full_grid_tensor = torch.tensor(full_grid, dtype=torch.float32).flatten()
        
        # Extract local grid (decode type strings into integers)
        # Level 2 may have a different local_grid structure
        if isinstance(state["local_grid"], list) and all(isinstance(item, int) for item in state["local_grid"]):
            # Already flattened encoded grid with integers
            local_grid_tensor = torch.tensor(state["local_grid"], dtype=torch.float32)
        else:
            # Handle traditional 2D grid of tile types
            local_grid = []
            for row in state["local_grid"]:
                grid_row = []
                for tile_type in row:
                    # Default to WALL if type not in mapping
                    tile_value = self.tile_type_to_id.get(tile_type, 1)
                    grid_row.append(tile_value)
                local_grid.append(grid_row)
            local_grid_tensor = torch.tensor(local_grid, dtype=torch.float32).flatten()
            
        # Enhanced state features for Level 2
        # Player state
        is_sliding = torch.tensor([int(state["is_sliding"])], dtype=torch.float32)
        is_being_forced = torch.tensor([int(state["is_being_forced"])], dtype=torch.float32)
        alive = torch.tensor([int(state["alive"])], dtype=torch.float32)
        
        # Game state
        remaining_chips = torch.tensor([state["remaining_chips"]], dtype=torch.float32)
        
        # Multiplayer state
        other_player_position = torch.tensor(state["other_player_position"] if state["other_player_position"] else [-1, -1], 
                                           dtype=torch.float32)
        player_id = torch.tensor([state["player_id"]], dtype=torch.float32)
        
        # Keys and boots (encode as one-hot vectors)
        keys_tensor = torch.zeros(4, dtype=torch.float32)  # RED, BLUE, YELLOW, GREEN
        if state["collected_keys"]:
            for key in state["collected_keys"]:
                if key in self.key_mapping:
                    keys_tensor[self.key_mapping[key]] = 1
                    
        boots_tensor = torch.zeros(3, dtype=torch.float32)  # WATER, FIRE, FORCE
        if state["collected_boots"]:
            for boot in state["collected_boots"]:
                if boot in self.boot_mapping:
                    boots_tensor[self.boot_mapping[boot]] = 1
    
        # Time information
        time_elapsed = torch.tensor([state["time_elapsed"]/1000] if "time_elapsed" in state else [0], 
                                  dtype=torch.float32)  # Normalize to seconds
        
        # Goal position information
        goal_pos = torch.tensor(state["goal_pos"] if "goal_pos" in state else [-1, -1], 
                              dtype=torch.float32)
        
        # Other player information
        other_player_chips = torch.tensor([state["other_player_collected_chips"]] 
                                       if "other_player_collected_chips" in state else [0], 
                                       dtype=torch.float32)
        
        # Construct the full state vector by concatenating all features
        state_vector = torch.cat([
            # Base state (Level 1 compatible)
            position, 
            chips_collected,
            total_chips_collected, 
            socket_unlocked,
            nearest_chip,
            nearest_key,
            nearest_boot,
            exit_position,
            full_grid_tensor,
            local_grid_tensor,
            
            # Enhanced state (Level 2)
            is_sliding,
            is_being_forced,
            alive,
            remaining_chips,
            other_player_position,
            player_id,
            keys_tensor,
            boots_tensor,
            time_elapsed,
            goal_pos,
            other_player_chips
        ])
            
        return state_vector, torch.tensor(action, dtype=torch.long)
    
    def get_input_size(self):
        """Return the size of the state vector (useful for model initialization)"""
        # Get a sample to determine vector size
        sample_vector, _ = self[0]
        return len(sample_vector)

## Load Dataset and Test

In [76]:
dataset = Level2GameDataset("../data/human_play_data_level1.json")

# The rest of your code remains unchanged
sample_vector, sample_action = dataset[0]
print(f"🔢 Sample Vector: {sample_vector.size()}")
print(f"📊 Loaded {len(dataset)} samples.")

# Train your model as usual
input_size = len(sample_vector)

🔢 Sample Vector: torch.Size([209])
📊 Loaded 3842 samples.


In [77]:
# Split into train and validation
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
sub_train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# Create dataloaders
train_loader = DataLoader(dataset, batch_size=64, shuffle=False, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=True)

# Behavior Cloning Model

In [48]:
import torch.nn as nn
import torch.optim as optim

## Level 1 Model

In [27]:
# FNC Network for behavior cloning
class BehaviorCloningModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(BehaviorCloningModel, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),                    
            nn.Linear(128, output_size)
        )

    def forward(self, x):
        return self.fc(x)

## Level 2 Model

In [69]:
# FNC Network for behavior cloning
class BehaviorCloningModelLv2(nn.Module):
    def __init__(self, input_size, output_size):
        super(BehaviorCloningModelLv2, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, output_size),
        )

    def forward(self, x):
        return self.fc(x)

In [70]:
input_size = len(dataset[0][0])  # Size of State Vector
output_size = 4  # Possible Actions (UP, DOWN, LEFT, RIGHT)
model = BehaviorCloningModelLv2(input_size, output_size)
print(f"input_size {input_size}, output_size: {output_size}")
print(f"hidden_size {model.fc}")

input_size 209, output_size: 4
hidden_size Sequential(
  (0): Linear(in_features=209, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=128, bias=True)
  (3): ReLU()
  (4): Linear(in_features=128, out_features=128, bias=True)
  (5): ReLU()
  (6): Linear(in_features=128, out_features=4, bias=True)
)


## Training Loop

In [78]:
training_config = {
    "epochs": 300,
    "model_name": "BehaviorCloningModelLv2",
    "input_size": input_size,
    "output_size": output_size,
    "optimizer": "Adam",
    "learning_rate": 0.001,
    "batch_size": 64,
    "criterion": "CrossEntropyLoss",
    "activation": "ReLU",
    "scheduler": "ReduceLROnPlateau"
}

In [79]:
import wandb
wandb.login(key="294ac5de6babc54da53b9aadb344b3bb173b314d")
# change name for each run
wandb.init(project="bc_surrogate_partner_lv2", name="level1_run_9.4", config=training_config)
wandb.config.update(training_config)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/neo/.netrc


In [80]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=training_config["learning_rate"])
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=20, verbose=True)



In [81]:
# Training loop
for epoch in range(training_config["epochs"]):
    # Training phase
    model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0
    
    for state_vectors, actions in train_loader:
        optimizer.zero_grad()
        outputs = model(state_vectors)
        loss = criterion(outputs, actions)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total += actions.size(0)
        train_correct += (predicted == actions).sum().item()
    
    train_loss = train_loss / len(train_loader)
    train_accuracy = 100 * train_correct / train_total
    
    # Validation phase
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    
    with torch.no_grad():
        for state_vectors, actions in val_loader:
            outputs = model(state_vectors)
            loss = criterion(outputs, actions)
            
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += actions.size(0)
            val_correct += (predicted == actions).sum().item()
    
    val_loss = val_loss / len(val_loader)
    val_accuracy = 100 * val_correct / val_total
    
    # Update learning rate
    scheduler.step(val_loss)
    
    # Log metrics
    wandb.log({
        "epoch": epoch + 1,
        "train_loss": train_loss,
        "train_accuracy": train_accuracy,
        "val_loss": val_loss,
        "val_accuracy": val_accuracy,
        "learning_rate": optimizer.param_groups[0]["lr"]
    })
    
    # Print progress
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{training_config['epochs']}, "
              f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}%, "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}%, "
              f"LR: {optimizer.param_groups[0]['lr']:.6f}")

# Close wandb
wandb.finish()

Epoch 10/300, Train Loss: 0.2340, Train Acc: 89.48%, Val Loss: 0.2782, Val Acc: 87.26%, LR: 0.001000
Epoch 20/300, Train Loss: 0.2101, Train Acc: 90.08%, Val Loss: 0.3185, Val Acc: 85.96%, LR: 0.001000
Epoch 30/300, Train Loss: 0.2260, Train Acc: 89.56%, Val Loss: 0.2562, Val Acc: 88.04%, LR: 0.001000
Epoch 40/300, Train Loss: 0.1693, Train Acc: 91.67%, Val Loss: 0.1973, Val Acc: 89.86%, LR: 0.000500
Epoch 50/300, Train Loss: 0.1565, Train Acc: 92.01%, Val Loss: 0.2149, Val Acc: 89.60%, LR: 0.000500
Epoch 60/300, Train Loss: 0.1667, Train Acc: 91.80%, Val Loss: 0.2068, Val Acc: 89.86%, LR: 0.000500
Epoch 70/300, Train Loss: 0.1461, Train Acc: 92.37%, Val Loss: 0.1284, Val Acc: 93.63%, LR: 0.000250
Epoch 80/300, Train Loss: 0.1438, Train Acc: 92.45%, Val Loss: 0.1402, Val Acc: 92.85%, LR: 0.000250
Epoch 90/300, Train Loss: 0.1419, Train Acc: 92.37%, Val Loss: 0.1164, Val Acc: 93.24%, LR: 0.000250
Epoch 100/300, Train Loss: 0.1416, Train Acc: 92.24%, Val Loss: 0.1291, Val Acc: 92.20%, LR

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇██
learning_rate,█████▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▅▅▄▄▅▄▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇▇▇█▇██████████
train_loss,█▇▇▇▆▃▃▅▄▃▃▃▃▃▃▃▂▂▂▂▂▂▁▁▂▂▁▁▂▁▂▁▁▁▁▁▁▁▁▁
val_accuracy,▁▄▅▄▅▆▆▆▇▇▇██▇███▇██████████████████████
val_loss,█▂▃▄▂▂▂▂▂▂▂▁▁▁▁▁▂▂▁▁▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▁

0,1
epoch,300.0
learning_rate,3e-05
train_accuracy,93.33333
train_loss,0.11939
val_accuracy,94.40832
val_loss,0.09524


In [82]:
# Model save directory
model_path = "../model/lv2_bc_model_9.4.pth"
# Save model
torch.save(model.state_dict(), model_path)
print("Model saved to " + model_path)

Model saved to ../model/lv2_bc_model_9.4.pth


In [107]:
def predict_action(model, state):
    model.eval()
    with torch.no_grad():
        state_vector = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
        output = model(state_vector)
        action_idx = torch.argmax(output).item()
    
    action_mapping = {0: "UP", 1: "DOWN", 2: "LEFT", 3: "RIGHT"}
    return action_mapping[action_idx]

test_state, _ = dataset[5]  # Check the first sample in the dataset
predicted_action = predict_action(model, test_state)
print(f"AI Predicted Action: {predicted_action}")

AI Predicted Action: DOWN


  state_vector = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
