In [1]:
import json
import sys
import os
# Add the parent directory to sys.path
sys.path.append(os.path.abspath('..'))
from src.tile_definitions import TILE_MAPPING

import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau

# Dataset Class

In [2]:
class Level2GameDataset(Dataset):
    def __init__(self, json_file, normalize_features=True):
        # Load the data
        with open(json_file, "r") as f:
            self.data = json.load(f)
        
        print(f"Loaded {len(self.data)} gameplay samples from {json_file}")
        
        # Action to integer mapping
        self.action_mapping = {"UP": 0, "DOWN": 1, "LEFT": 2, "RIGHT": 3, "FORCED": 4, "SLIDE": 5}
        
        # Create reverse mapping (tile_type -> ID) from TILE_MAPPING
        self.tile_type_to_id = {}
        for tile_id, (tile_type, _, _, _, _) in TILE_MAPPING.items():
            self.tile_type_to_id[tile_type] = tile_id
        
        # Feature normalization option
        self.normalize_features = normalize_features
        
        # Calculate dataset statistics if normalizing
        if self.normalize_features:
            self.calculate_normalization_stats()
            
    def calculate_normalization_stats(self):
        """Calculate statistics for feature normalization"""
        # Extract grid dimensions from first sample
        first_sample = self.data[0]["state"]["full_grid"]
        self.grid_height = len(first_sample)
        self.grid_width = len(first_sample[0])
        
        print(f"Grid dimensions: {self.grid_height}x{self.grid_width}")
        
        # Initialize normalization ranges
        self.position_max = max(self.grid_width, self.grid_height)
        self.chip_max = 2  # Reasonable maximum for chips
        
        print("Normalization enabled: Features will be scaled to [0,1]")

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        sample = self.data[idx]
        state = sample["state"]
        action = self.action_mapping[sample["action"]]
        
        # Extract state components
        position = torch.tensor(state["position"], dtype=torch.float32)
        chips_collected = torch.tensor([state["player_collected_chips"]], dtype=torch.float32)
        total_chips_collected = torch.tensor([state["total_collected_chips"]], dtype=torch.float32)
        socket_unlocked = torch.tensor([int(state["socket_unlocked"])], dtype=torch.float32)
        nearest_chip = torch.tensor(state["nearest_chip"], dtype=torch.float32)
        exit_location = torch.tensor(state["exit_position"], dtype=torch.float32)
        
        # Process key and boot information
        # Convert dictionary form to binary features
        key_features = torch.zeros(4)  # RED, BLUE, GREEN, YELLOW
        boot_features = torch.zeros(3)  # WATER, FIRE, FORCE
        
        # Process keys if available
        if "collected_keys" in state and state["collected_keys"]:
            keys_dict = state["collected_keys"]
            if "RED" in keys_dict and keys_dict["RED"]:
                key_features[0] = 1.0
            if "BLUE" in keys_dict and keys_dict["BLUE"]:
                key_features[1] = 1.0
            if "GREEN" in keys_dict and keys_dict["GREEN"]:
                key_features[2] = 1.0
            if "YELLOW" in keys_dict and keys_dict["YELLOW"]:
                key_features[3] = 1.0
        
        # Process boots if available
        if "collected_boots" in state and state["collected_boots"]:
            boots_dict = state["collected_boots"]
            if "WATER" in boots_dict and boots_dict["WATER"]:
                boot_features[0] = 1.0
            if "FIRE" in boots_dict and boots_dict["FIRE"]:
                boot_features[1] = 1.0
            if "FORCE" in boots_dict and boots_dict["FORCE"]:
                boot_features[2] = 1.0
        
        # Process the full grid
        full_grid = []
        for row in state["full_grid"]:
            processed_row = []
            for tile_type in row:
                # Map to integer using the updated tile definitions
                tile_id = self.tile_type_to_id.get(tile_type, 1)
                processed_row.append(tile_id)
            full_grid.append(processed_row)
        
        full_grid_tensor = torch.tensor(full_grid, dtype=torch.float32)
        
        # Process the local grid
        local_grid = []
        for row in state["local_grid"]:
            processed_row = []
            for tile_type in row:
                # Map to integer using the updated tile definitions
                tile_id = self.tile_type_to_id.get(tile_type, 1)
                processed_row.append(tile_id)
            local_grid.append(processed_row)
        
        local_grid_tensor = torch.tensor(local_grid, dtype=torch.float32)
        
        if self.normalize_features:
            max_tile_id = max(self.tile_type_to_id.values())
            full_grid_tensor = full_grid_tensor / max_tile_id
            local_grid_tensor = local_grid_tensor / max_tile_id
        
        # Additional state information
        is_sliding = torch.tensor([float(state.get("is_sliding", False))], dtype=torch.float32)
        is_being_forced = torch.tensor([float(state.get("is_being_forced", False))], dtype=torch.float32)
        alive = torch.tensor([float(state.get("alive", True))], dtype=torch.float32)
        remaining_chips = torch.tensor([state.get("remaining_chips", 0)], dtype=torch.float32)
        other_player_pos = torch.tensor(state.get("other_player_position", [-1, -1]), dtype=torch.float32)
        
        # Normalize position-based features if enabled
        if self.normalize_features:
            position = position / self.position_max
            nearest_chip = nearest_chip / self.position_max
            exit_location = exit_location / self.position_max
            
            if not (other_player_pos[0] == -1 and other_player_pos[1] == -1):
                other_player_pos = other_player_pos / self.position_max
                
            chips_collected = chips_collected / self.chip_max
            total_chips_collected = total_chips_collected / self.chip_max
            remaining_chips = remaining_chips / self.chip_max
            
        # Concatenate all state information into a single vector
        state_vector = torch.cat([
            position,                    # 2
            chips_collected,             # 1
            total_chips_collected,       # 1
            socket_unlocked,             # 1
            nearest_chip,                # 2
            exit_location,               # 2
            key_features,                # 4 (RED, BLUE, GREEN, YELLOW)
            boot_features,               # 3 (WATER, FIRE, FORCE)
            full_grid_tensor.flatten(),  # grid_height * grid_width
            local_grid_tensor.flatten(),
            is_sliding,                  # 1
            is_being_forced,             # 1
            alive,                       # 1
            remaining_chips,             # 1
            other_player_pos,            # 2
        ])
        
        return state_vector, torch.tensor(action, dtype=torch.long)
    
    def get_input_size(self):
        """Return the input size for the BC model"""
        # Get dimensions from first item in dataset
        sample_vector, _ = self[0]
        return sample_vector.size(0)

## Load Dataset and Test

In [3]:
# Dataset path
level2_data_path = "../data/human_play_data_level1.json"  # Update with your level 2 data path

# Initialize dataset
dataset = Level2GameDataset(level2_data_path, normalize_features=True)
input_size = dataset.get_input_size()
output_size = 6  # Four possible actions: UP, DOWN, LEFT, RIGHT
print(dataset.grid_height, dataset.grid_width, dataset.position_max)

Loaded 2020 gameplay samples from ../data/human_play_data_level1.json
Grid dimensions: 13x13
Normalization enabled: Features will be scaled to [0,1]
13 13 13


In [4]:
# Split into train and validation
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Behavior Cloning Model

In [5]:
import torch.nn as nn
import torch.optim as optim

In [6]:
# FNC Network for behavior cloning
class BehaviorCloningModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(BehaviorCloningModel, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),  
            nn.Linear(256, 256),
            nn.ReLU(),                            
            nn.Linear(256, output_size)
        )

    def forward(self, x):
        return self.fc(x)

In [7]:
model = BehaviorCloningModel(input_size, output_size)

In [8]:
print(f"input_size {input_size}, output_size: {output_size}")

input_size 200, output_size: 6


## Training Loop

In [10]:
training_config = {
    "epochs": 300,
    "model_name": "BehaviorCloningModel",
    "input_size": input_size,
    "output_size": output_size,
    "optimizer": "Adam",
    "learning_rate": 0.001,
    "batch_size": 32,
    "criterion": "CrossEntropyLoss",
    "activation": "ReLU",
    "scheduler": "ReduceLROnPlateau"
}

In [11]:
import wandb
wandb.login(key="294ac5de6babc54da53b9aadb344b3bb173b314d")
# change name for each run
wandb.init(project="bc_surrogate_partner_lv2", name="level2_run_1.1", config=training_config)
wandb.config.update(training_config)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/neo/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mknocknocknik[0m ([33mknocknocknik-pitts[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [12]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=training_config["learning_rate"])
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=20, verbose=True)



In [13]:
# Training loop
for epoch in range(training_config["epochs"]):
    # Training phase
    model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0
    
    for state_vectors, actions in train_loader:
        optimizer.zero_grad()
        outputs = model(state_vectors)
        loss = criterion(outputs, actions)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total += actions.size(0)
        train_correct += (predicted == actions).sum().item()
    
    train_loss = train_loss / len(train_loader)
    train_accuracy = 100 * train_correct / train_total
    
    # Validation phase
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    
    with torch.no_grad():
        for state_vectors, actions in val_loader:
            outputs = model(state_vectors)
            loss = criterion(outputs, actions)
            
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += actions.size(0)
            val_correct += (predicted == actions).sum().item()
    
    val_loss = val_loss / len(val_loader)
    val_accuracy = 100 * val_correct / val_total
    
    # Update learning rate
    scheduler.step(val_loss)
    
    # Log metrics
    wandb.log({
        "epoch": epoch + 1,
        "train_loss": train_loss,
        "train_accuracy": train_accuracy,
        "val_loss": val_loss,
        "val_accuracy": val_accuracy,
        "learning_rate": optimizer.param_groups[0]["lr"]
    })
    
    # Print progress
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{training_config['epochs']}, "
              f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}%, "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}%, "
              f"LR: {optimizer.param_groups[0]['lr']:.6f}")

# Close wandb
wandb.finish()

Epoch 10/300, Train Loss: 0.9444, Train Acc: 60.89%, Val Loss: 0.8953, Val Acc: 64.36%, LR: 0.001000
Epoch 20/300, Train Loss: 0.8135, Train Acc: 66.65%, Val Loss: 0.8114, Val Acc: 67.57%, LR: 0.001000
Epoch 30/300, Train Loss: 0.6923, Train Acc: 72.03%, Val Loss: 0.8120, Val Acc: 68.07%, LR: 0.001000
Epoch 40/300, Train Loss: 0.6218, Train Acc: 74.57%, Val Loss: 0.8597, Val Acc: 67.82%, LR: 0.001000
Epoch 50/300, Train Loss: 0.5183, Train Acc: 79.21%, Val Loss: 0.8122, Val Acc: 70.05%, LR: 0.001000
Epoch 60/300, Train Loss: 0.4689, Train Acc: 81.25%, Val Loss: 0.9022, Val Acc: 67.57%, LR: 0.001000
Epoch 70/300, Train Loss: 0.3569, Train Acc: 85.15%, Val Loss: 0.8402, Val Acc: 72.77%, LR: 0.000500
Epoch 80/300, Train Loss: 0.2938, Train Acc: 87.31%, Val Loss: 0.9632, Val Acc: 73.27%, LR: 0.000500
Epoch 90/300, Train Loss: 0.2705, Train Acc: 88.43%, Val Loss: 1.0776, Val Acc: 74.50%, LR: 0.000250
Epoch 100/300, Train Loss: 0.2257, Train Acc: 90.22%, Val Loss: 1.1038, Val Acc: 75.00%, LR

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇█████
learning_rate,██████████▄▄▄▄▄▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▃▃▄▄▄▄▅▅▆▆▇▇▇▇▇▇███████████████████████
train_loss,██▇▆▆▆▆▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▃▃▄▃▅▅▄▅▅▆█▅▆▇███▇█▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
val_loss,▅▃▃▂▁▁▁▁▁▁▁▁▁▂▂▃▃▃▄▄▅▅▆▆▆▇██████████████

0,1
epoch,300.0
learning_rate,0.0
train_accuracy,94.30693
train_loss,0.1412
val_accuracy,73.51485
val_loss,1.55322


In [14]:
# Model save directory
model_path = "../model/lv2_bc_model_1.1.pth"
# Save model
torch.save(model.state_dict(), model_path)

In [15]:
def predict_action(model, state):
    model.eval()
    with torch.no_grad():
        state_vector = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
        output = model(state_vector)
        action_idx = torch.argmax(output).item()
    
    action_mapping = {0: "UP", 1: "DOWN", 2: "LEFT", 3: "RIGHT", 4:"FORCED", 5:"SLIDE"}
    return action_mapping[action_idx]

test_state, _ = dataset[0]  # Check the first sample in the dataset
predicted_action = predict_action(model, test_state)
print(f"AI Predicted Action: {predicted_action}")

AI Predicted Action: LEFT


  state_vector = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
