In [1]:
import json
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau

# Dataset Class

In [2]:
class GameDataset(Dataset):
    def __init__(self, json_file):
        with open(json_file, "r") as f:
            self.data = json.load(f)
        
        # Action to integer mapping
        self.action_mapping = {"UP": 0, "DOWN": 1, "LEFT": 2, "RIGHT": 3}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        state = sample["state"]
        action = self.action_mapping[sample["action"]]

        # Where is the agent?
        position = torch.tensor(state["position"], dtype=torch.float32)

        # How many chips have been collected?
        chips_collected = torch.tensor([state["chips_collected"]], dtype=torch.float32)
        
        total_chips_collected = torch.tensor([state["total_collected_chips"]], dtype=torch.float32)

        # Is the socket unlocked?
        socket_unlocked = torch.tensor([int(state["socket_unlocked"])], dtype=torch.float32)

        # Whre is the nearest chip?
        nearest_chip = torch.tensor(state["nearest_chip"], dtype=torch.float32)
        
        # Where is the exit?
        exit_location = torch.tensor(state["exit_position"], dtype=torch.float32)

        # Adjacent tile information
        tile_mapping = {"WALL": 0, "FLOOR": 1, "CHIP": 2, "EXIT": 3, "SOCKET": 4, "WATER": 5, "FIRE": 6, "HINT":7}
        local_grid = torch.tensor([[tile_mapping[tile] for tile in row] for row in state["local_grid"]], dtype=torch.float32)

        # Concatenate all the state information into a single tensor -> State Vector
        state_vector = torch.cat([position, chips_collected, total_chips_collected, socket_unlocked, nearest_chip, exit_location, local_grid.flatten()])
        
        return state_vector, torch.tensor(action, dtype=torch.long)

## Load Dataset and Test

In [17]:
dataset = GameDataset("../data/human_play_data.json")
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)
sample_vector, sample_action = dataset[0]  
print(f"🔢 Sample Vector: {sample_vector.size()}")  # 🔥 샘플 벡터 크기 확인!
print(f"📊 Loaded {len(dataset)} samples.")

🔢 Sample Vector: torch.Size([34])
📊 Loaded 1311 samples.


# Behavior Cloning Model

In [4]:
import torch.nn as nn
import torch.optim as optim

In [18]:
# FNC Network for behavior cloning
class BehaviorCloningModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(BehaviorCloningModel, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),                          
            nn.Linear(64, output_size)
        )

    def forward(self, x):
        return self.fc(x)

In [19]:
input_size = len(dataset[0][0])  # Size of State Vector
print(dataset[0])
output_size = 4  # Possible Actions (UP, DOWN, LEFT, RIGHT)
model = BehaviorCloningModel(input_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

(tensor([10.,  6.,  0.,  0.,  0.,  9.,  3.,  6.,  6.,  0.,  0.,  0.,  1.,  0.,
         1.,  1.,  1.,  1.,  0.,  1.,  1.,  1.,  1.,  0.,  1.,  1.,  1.,  1.,
         0.,  1.,  1.,  1.,  1.,  0.]), tensor(1))


In [20]:
print(f"input_size {input_size}, output_size: {output_size}")

input_size 34, output_size: 4


## Training Loop

In [None]:
# traning configuration
training_config = {
    "epochs": 5000,
    "model_name": "BehaviorCloningModel",
    "intput_size": input_size,
    "output_size": output_size,
    "optimizer": "Adam",
    "learning_rate": 0.01,
    "batch_size": 128,
    "criterion": "CrossEntropyLoss",
    "activation": "ReLU",
    "scheudler": "StepLR"
}

In [22]:
import wandb
#wandb.login(key="294ac5de6babc54da53b9aadb344b3bb173b314d")
# change name for each run
wandb.init(project="bc_surrogate_partner", name="run_7.0", id="3x3_64_FCN-3Layer_5000ep", resume="never")
wandb.config.update(training_config)

In [23]:
# 🔥 훈련 루프
num_epochs = training_config["epochs"]
scheduler = StepLR(optimizer, step_size=200, gamma=0.9)

for epoch in range(num_epochs):
    total_loss = 0

    for state_vector, action in dataloader:
        optimizer.zero_grad()

        # predict
        outputs = model(state_vector)

        # loss calculation
        loss = criterion(outputs, action)
        total_loss += loss.item()

        # back propagation
        loss.backward()
        optimizer.step()
    
    scheduler.step()
    
    if (epoch + 1) % 100 == 0:
        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, LR: {scheduler.get_last_lr()[0]:.4f}")
        wandb.log({"epoch": epoch + 1, "loss": avg_loss, "learning_rate": scheduler.get_last_lr()[0]})
        
# close wandb
wandb.finish()

Epoch 100/5000, Loss: 0.4296, LR: 0.0010
Epoch 200/5000, Loss: 0.3316, LR: 0.0009
Epoch 300/5000, Loss: 0.2873, LR: 0.0009
Epoch 400/5000, Loss: 0.2792, LR: 0.0008
Epoch 500/5000, Loss: 0.2730, LR: 0.0008
Epoch 600/5000, Loss: 0.2593, LR: 0.0007
Epoch 700/5000, Loss: 0.2583, LR: 0.0007
Epoch 800/5000, Loss: 0.2526, LR: 0.0007
Epoch 900/5000, Loss: 0.2508, LR: 0.0007
Epoch 1000/5000, Loss: 0.2386, LR: 0.0006
Epoch 1100/5000, Loss: 0.2420, LR: 0.0006
Epoch 1200/5000, Loss: 0.2430, LR: 0.0005
Epoch 1300/5000, Loss: 0.2362, LR: 0.0005
Epoch 1400/5000, Loss: 0.2392, LR: 0.0005
Epoch 1500/5000, Loss: 0.2410, LR: 0.0005
Epoch 1600/5000, Loss: 0.2365, LR: 0.0004
Epoch 1700/5000, Loss: 0.2367, LR: 0.0004
Epoch 1800/5000, Loss: 0.2340, LR: 0.0004
Epoch 1900/5000, Loss: 0.2403, LR: 0.0004
Epoch 2000/5000, Loss: 0.2346, LR: 0.0003
Epoch 2100/5000, Loss: 0.2344, LR: 0.0003
Epoch 2200/5000, Loss: 0.2345, LR: 0.0003
Epoch 2300/5000, Loss: 0.2315, LR: 0.0003
Epoch 2400/5000, Loss: 0.2305, LR: 0.0003
E

KeyboardInterrupt: 

In [None]:
# Model save directory
model_path = "../model/lv1_bc_model.pth"
# Save model
torch.save(model.state_dict(), model_path)

📁 Model saved at ../model/lv1_bc_model_4.0.pth


In [None]:
def predict_action(model, state):
    model.eval()
    with torch.no_grad():
        state_vector = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
        output = model(state_vector)
        action_idx = torch.argmax(output).item()
    
    action_mapping = {0: "UP", 1: "DOWN", 2: "LEFT", 3: "RIGHT"}
    return action_mapping[action_idx]

test_state, _ = dataset[0]  # Check the first sample in the dataset
predicted_action = predict_action(model, test_state)
print(f"AI Predicted Action: {predicted_action}")

🤖 AI Predicted Action: DOWN


  state_vector = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
