Temporal Innovator:
- Retrocausality project

In [None]:
#%pip install mesa
#pip install ipykernel
#pip install ipywidgets --upgrade

In [1]:
from mesa import Agent, Model
from mesa.space import MultiGrid
import random

Initial setup:

In [2]:
class TimeAgent(Agent):
    def __init__(self, model):
        super().__init__(model) # Mesa 3: only model is passed
        self.model = model  # Explicitly store model reference if needed
        self.positions = [] # Will be populated with initial position in TimeModel

    def step(self):
        attempts = 0
        max_attempts = 10  # Prevent infinite loops
        while attempts < max_attempts:
            x, y = self.pos
            move = random.choice([(0, 1), (0, -1), (1, 0), (-1, 0)]) # Move randomly: up, down, left, right
            new_pos = (x + move[0], y + move[1])
            if (0 <= new_pos[0] < self.model.grid.width) and (0 <= new_pos[1] < self.model.grid.height):
                if new_pos not in self.model.occupied_positions or new_pos == self.pos:
                    self.model.grid.move_agent(self, new_pos)
                    self.model.occupied_positions.discard(self.pos) # Remove old position
                    self.model.occupied_positions.add(new_pos) # Add new position
                    self.positions.append(new_pos) # Record new position
                    break
            attempts += 1
        # If no valid move is found after max_attempts, agent stays put (position unchanged)
        else:
            self.positions.append(self.pos)  # Explicitly stay put

          
class TimeModel(Model):
    def __init__(self):
        super().__init__()
        self.grid = MultiGrid(10, 10, False)  # 10x10 grid, torus disabled
        self.schedule = []  # Manual agent list
        self.random = random.Random()
        self.step_count = 0  # Track step number
        self.occupied_positions = set()  # Track occupied positions
        
        # Create 5 agents with unique random starting positions
        available_positions = [(x, y) for x in range(10) for y in range(10)]  # All 10x10 positions
        self.random.shuffle(available_positions)  # Randomize order
        for i in range(5):
            agent = TimeAgent(self)
            start_pos = available_positions[i]  # Take a unique position
            self.grid.place_agent(agent, start_pos)
            agent.pos = start_pos  # Explicitly set pos (Mesa 3 compatibility)
            
            agent.positions.append(start_pos)  # Record initial position
            self.occupied_positions.add(start_pos)
            self.schedule.append(agent)    

    def step(self):
        # Reset occupied positions for this step (will be rebuilt)
        self.occupied_positions.clear()
        for agent in self.schedule:
            self.occupied_positions.add(agent.pos)
        
        random.shuffle(self.schedule)  # Random activation
        for agent in self.schedule:
            agent.step()
        self.step_count += 1  # Increment before printing    
        # self.print_positions()
        

    # Return the list of position histories for all agents
    def get_positions(self):
        sorted_agents = sorted(self.schedule, key=lambda a: a.unique_id)
        return [agent.positions for agent in sorted_agents]
    
    # Print history positions of all agents
    def print_positions(self):
        sorted_agents = sorted(self.schedule, key=lambda a: a.unique_id)
        for agent in sorted_agents:
            print(f"Agent {agent.unique_id}: {agent.positions}")


model = TimeModel()

for _ in range(5):
    model.step()
model.print_positions()

Agent 1: [(6, 0), (5, 0), (5, 1), (5, 2), (5, 3), (4, 3)]
Agent 2: [(7, 4), (7, 5), (6, 5), (7, 5), (7, 4), (6, 4)]
Agent 3: [(6, 8), (6, 9), (7, 9), (6, 9), (6, 8), (6, 7)]
Agent 4: [(7, 6), (8, 6), (7, 6), (6, 6), (7, 6), (7, 7)]
Agent 5: [(5, 1), (4, 1), (4, 0), (5, 0), (5, 1), (6, 1)]


In [1]:
import torch
print(torch.__version__)

2.6.0+cpu


Tranning

In [9]:
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
import pickle
import random
from mesa import Agent, Model
from mesa.space import MultiGrid

# TimeAgent with New Rule
class TimeAgent(Agent):
    def __init__(self, model, use_rule=True):
        super().__init__(model)
        self.model = model
        self.positions = []
        self.use_rule = use_rule

    def step(self):
        global trained_model
        x, y = self.pos
        moves = [(0, 1), (-1, 0), (0, -1), (1, 0)]  # Up, Left, Down, Right
        
        avoid_move = None
        if len(self.positions) >= 5 and trained_model is not None and self.use_rule:
            seq = self.positions[-5:]
            others = [a.pos for a in self.model.schedule if a != self]
            # Prepare input: 5 steps, each with (x, y) + 10 features
            input_data = []
            for pos in seq:
                step_features = list(pos)  # [x, y]
                # Add 5 other agents' relative positions (10 values)
                for i in range(min(5, len(others))):
                    ox, oy = others[i]
                    step_features.extend([ox - pos[0], oy - pos[1]])
                while len(step_features) < 12:  # Pad to 12
                    step_features.extend([0, 0])
                input_data.append(step_features[:12])
            input_data = torch.tensor([input_data], dtype=torch.float32) / 9.0
            with torch.no_grad():
                pred_dir = trained_model(input_data).argmax().item()
            avoid_move = list(reverse_map.keys())[pred_dir]

        attempts = 0
        max_attempts = 10
        move_idx = random.randint(0, 3)

        if self.use_rule:
            while attempts < max_attempts:
                move = moves[move_idx]
                if move == avoid_move and attempts < max_attempts - 1:
                    move_idx = (move_idx + 1) % 4
                    attempts += 1
                    continue
                new_pos = (x + move[0], y + move[1])
                if (0 <= new_pos[0] < self.model.grid.width) and (0 <= new_pos[1] < self.model.grid.height):
                    if new_pos not in self.model.occupied_positions or new_pos == self.pos:
                        self.model.grid.move_agent(self, new_pos)
                        self.model.occupied_positions.discard(self.pos)
                        self.model.occupied_positions.add(new_pos)
                        self.positions.append(new_pos)
                        break
                left_idx = (move_idx + 1) % 4
                left_pos = (x + moves[left_idx][0], y + moves[left_idx][1])
                if (0 <= left_pos[0] < self.model.grid.width) and (0 <= left_pos[1] < self.model.grid.height) and left_pos not in self.model.occupied_positions:
                    self.model.grid.move_agent(self, left_pos)
                    self.model.occupied_positions.discard(self.pos)
                    self.model.occupied_positions.add(left_pos)
                    self.positions.append(left_pos)
                    break
                opp_idx = (move_idx + 2) % 4
                opp_pos = (x + moves[opp_idx][0], y + moves[opp_idx][1])
                if (0 <= opp_pos[0] < self.model.grid.width) and (0 <= opp_pos[1] < self.model.grid.height) and opp_pos not in self.model.occupied_positions:
                    self.model.grid.move_agent(self, opp_pos)
                    self.model.occupied_positions.discard(self.pos)
                    self.model.occupied_positions.add(opp_pos)
                    self.positions.append(opp_pos)
                    break
                self.positions.append(self.pos)
                break
        else:
            while attempts < max_attempts:
                move = random.choice(moves)
                new_pos = (x + move[0], y + move[1])
                if (0 <= new_pos[0] < self.model.grid.width) and (0 <= new_pos[1] < self.model.grid.height):
                    if new_pos not in self.model.occupied_positions or new_pos == self.pos:
                        self.model.grid.move_agent(self, new_pos)
                        self.model.occupied_positions.discard(self.pos)
                        self.model.occupied_positions.add(new_pos)
                        self.positions.append(new_pos)
                        break
                attempts += 1
            else:
                self.positions.append(self.pos)

# TimeModel
class TimeModel(Model):
    def __init__(self, use_rule=True):
        super().__init__()
        self.grid = MultiGrid(10, 10, False)
        self.schedule = []
        self.random = random.Random()
        self.step_count = 0
        self.occupied_positions = set()
        self.use_rule = use_rule
        
        available_positions = [(x, y) for x in range(10) for y in range(10)]
        self.random.shuffle(available_positions)
        for i in range(30):
            agent = TimeAgent(self, use_rule=self.use_rule)
            start_pos = available_positions[i]
            self.grid.place_agent(agent, start_pos)
            agent.pos = start_pos
            agent.positions.append(start_pos)
            self.occupied_positions.add(start_pos)
            self.schedule.append(agent)

    def step(self):
        self.occupied_positions.clear()
        for agent in self.schedule:
            self.occupied_positions.add(agent.pos)
        random.shuffle(self.schedule)
        for agent in self.schedule:
            agent.step()
        self.step_count += 1

    def get_positions(self):
        sorted_agents = sorted(self.schedule, key=lambda a: a.unique_id)
        return [(agent.positions, [a.pos for a in sorted_agents if a != agent]) for agent in sorted_agents]

# TCN
class TCN(nn.Module):
    def __init__(self, input_size=12, output_size=5, num_channels=[64, 64, 64], kernel_size=5, dropout=0.2):
        super(TCN, self).__init__()
        layers = []
        for i in range(len(num_channels)):
            dilation = 2 ** i
            in_channels = input_size if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            padding = (kernel_size - 1) * dilation
            layers.append(nn.Conv1d(in_channels, out_channels, kernel_size, padding=padding, dilation=dilation))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            if padding > 0:
                layers.append(nn.ConstantPad1d((-padding, 0), 0))
        self.tcn = nn.Sequential(*layers)
        self.fc = nn.Linear(num_channels[-1], output_size)

    def forward(self, x):
        x = x.transpose(1, 2)  # [batch_size, input_size, seq_len]
        out = self.tcn(x)
        out = out[:, :, -1]  # Last time step
        return out

# Data collection
def collect_and_save_data(num_runs=2000, filename="abm_data_with_rule.pkl", use_rule=True):
    all_data = []
    for run in range(num_runs):
        model = TimeModel(use_rule=use_rule)
        positions_history = []
        for _ in range(10):
            model.step()
            positions_history.append(model.get_positions())
        all_data.append(positions_history)
        if (run + 1) % 100 == 0:
            print(f"Completed {run + 1}/{num_runs} runs")
    with open(filename, 'wb') as f:
        pickle.dump(all_data, f)
    print(f"Saved {len(all_data)} runs to {filename}")
    return all_data

# Prepare data (Fixed)
def prepare_training_data(data, seq_len=5):
    X, y = [], []
    direction_map = {(0, 1): 0, (0, -1): 1, (1, 0): 2, (-1, 0): 3, (0, 0): 4}
    for run_data in data:
        for step_idx in range(len(run_data) - seq_len):
            step_data = run_data[step_idx:step_idx + seq_len]
            for positions, others in step_data[-1]:
                if len(positions) < seq_len + 1:
                    continue
                seq = positions[-seq_len - 1:-1]  # 5 steps
                seq_data = []
                for pos in seq:
                    step_features = list(pos)  # [x, y]
                    # Add 5 other agents' relative positions (10 values)
                    for i in range(min(5, len(others))):
                        ox, oy = others[i]
                        step_features.extend([ox - pos[0], oy - pos[1]])
                    while len(step_features) < 12:  # Pad to 12
                        step_features.extend([0, 0])
                    seq_data.append(step_features[:12])
                X.append(seq_data)
                # Target
                x1, y1 = positions[-2]
                x2, y2 = positions[-1]
                direction = (x2 - x1, y2 - y1)
                y.append(direction_map[direction])
    X = torch.tensor(X, dtype=torch.float32) / 9.0
    y = torch.tensor(y, dtype=torch.long)
    return X, y

# Training
def train_tcn(X, y, epochs=300, batch_size=32, learning_rate=0.001, patience=20):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
    val_dataset = torch.utils.data.TensorDataset(X_val, y_val)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size)

    model = TCN(input_size=12, output_size=5, num_channels=[64, 64, 64], kernel_size=5, dropout=0.2)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

    best_val_loss = float('inf')
    epochs_no_improve = 0
    best_model_state = None

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        train_correct = 0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            train_correct += (outputs.argmax(dim=1) == batch_y).sum().item()

        model.eval()
        val_loss = 0
        val_correct = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                outputs = model(batch_X)
                val_loss += criterion(outputs, batch_y).item()
                val_correct += (outputs.argmax(dim=1) == batch_y).sum().item()

        scheduler.step()
        
        train_loss_avg = train_loss / len(train_loader)
        val_loss_avg = val_loss / len(val_loader)
        train_acc = train_correct / len(X_train)
        val_acc = val_correct / len(X_val)
        
        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss_avg:.4f}, Val Loss: {val_loss_avg:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, LR: {scheduler.get_last_lr()[0]:.6f}")
        
        if val_loss_avg < best_val_loss:
            best_val_loss = val_loss_avg
            epochs_no_improve = 0
            best_model_state = model.state_dict()
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"Early stopping triggered after {epoch+1} epochs")
                break
    
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    torch.save(model.state_dict(), "tcn_model.pth")
    print("Model saved to tcn_model.pth")
    return model

# Global variables
trained_model = None
direction_map = {0: "Up (0, 1)", 1: "Down (0, -1)", 2: "Right (1, 0)", 3: "Left (-1, 0)", 4: "No move (0, 0)"}
reverse_map = {(0, 1): 0, (0, -1): 1, (1, 0): 2, (-1, 0): 3, (0, 0): 4}

# Train with rule
sequences_with_rule = collect_and_save_data(num_runs=2000, filename="abm_data_with_rule.pkl", use_rule=True)
X, y = prepare_training_data(sequences_with_rule)
print(f"Loaded {len(sequences_with_rule)} runs with rule")
print(f"Training data shape: X={X.shape}, y={y.shape}")

print("\nTraining TCN (Direction Prediction with Rule)...")
trained_model = train_tcn(X, y, epochs=300, batch_size=32, learning_rate=0.001, patience=20)

# Compare with vs. without rule
def compare_rule_effects(runs=100, steps=10):
    no_rule_model = TimeModel(use_rule=False)
    no_rule_moves = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
    for _ in range(runs):
        for _ in range(steps):
            no_rule_model.step()
        for agent in no_rule_model.schedule:
            for i in range(len(agent.positions) - 1):
                dx, dy = agent.positions[i + 1][0] - agent.positions[i][0], agent.positions[i + 1][1] - agent.positions[i][1]
                move_idx = reverse_map.get((dx, dy), 4)
                no_rule_moves[move_idx] += 1

    with_rule_model = TimeModel(use_rule=True)
    with_rule_moves = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
    for _ in range(runs):
        for _ in range(steps):
            with_rule_model.step()
        for agent in with_rule_model.schedule:
            for i in range(len(agent.positions) - 1):
                dx, dy = agent.positions[i + 1][0] - agent.positions[i][0], agent.positions[i + 1][1] - agent.positions[i][1]
                move_idx = reverse_map.get((dx, dy), 4)
                with_rule_moves[move_idx] += 1

    total_no_rule = sum(no_rule_moves.values())
    total_with_rule = sum(with_rule_moves.values())
    print("\nNo Rule Move Frequencies:", {direction_map[k]: v/total_no_rule for k, v in no_rule_moves.items()})
    print("With Rule Move Frequencies:", {direction_map[k]: v/total_with_rule for k, v in with_rule_moves.items()})

# Run comparison
compare_rule_effects(runs=100, steps=10)

Completed 100/2000 runs
Completed 200/2000 runs
Completed 300/2000 runs
Completed 400/2000 runs
Completed 500/2000 runs
Completed 600/2000 runs
Completed 700/2000 runs
Completed 800/2000 runs
Completed 900/2000 runs
Completed 1000/2000 runs
Completed 1100/2000 runs
Completed 1200/2000 runs
Completed 1300/2000 runs
Completed 1400/2000 runs
Completed 1500/2000 runs
Completed 1600/2000 runs
Completed 1700/2000 runs
Completed 1800/2000 runs
Completed 1900/2000 runs
Completed 2000/2000 runs
Saved 2000 runs to abm_data_with_rule.pkl
Loaded 2000 runs with rule
Training data shape: X=torch.Size([300000, 5, 12]), y=torch.Size([300000])

Training TCN (Direction Prediction with Rule)...
Epoch [10/300], Train Loss: 3.7390, Val Loss: 3.6636, Train Acc: 0.2126, Val Acc: 0.2430, LR: 0.001000
Epoch [20/300], Train Loss: 3.7366, Val Loss: 3.6650, Train Acc: 0.2127, Val Acc: 0.2430, LR: 0.001000
Epoch [30/300], Train Loss: 3.7356, Val Loss: 3.6635, Train Acc: 0.2131, Val Acc: 0.2432, LR: 0.001000
Epoch 

Second one

In [None]:
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
import pickle
import random
from mesa import Agent, Model
from mesa.space import MultiGrid

# TimeAgent with Simpler Rule
class TimeAgent(Agent):
    def __init__(self, model, use_rule=True):
        super().__init__(model)
        self.model = model
        self.positions = []
        self.use_rule = use_rule

    def step(self):
        global trained_model
        x, y = self.pos
        moves = [(0, 1), (-1, 0), (0, -1), (1, 0)]
        
        avoid_move = None
        if len(self.positions) >= 5 and trained_model is not None and self.use_rule:
            seq = self.positions[-5:]
            others = [a.pos for a in self.model.schedule if a != self]
            input_data = []
            for pos in seq:
                step_features = list(pos)
                for ox, oy in others:  # All 29
                    step_features.extend([ox - pos[0], oy - pos[1]])
                while len(step_features) < 60:
                    step_features.extend([0, 0])
                input_data.append(step_features[:60])
            input_data = torch.tensor([input_data], dtype=torch.float32) / 9.0
            with torch.no_grad():
                pred_dir = trained_model(input_data).argmax().item()
            avoid_move = list(reverse_map.keys())[pred_dir]

        attempts = 0
        max_attempts = 10
        move_idx = random.randint(0, 3)

        if self.use_rule:
            while attempts < max_attempts:
                move = moves[move_idx]
                if move == avoid_move and attempts < max_attempts - 1:
                    move_idx = (move_idx + 1) % 4
                    attempts += 1
                    continue
                new_pos = (x + move[0], y + move[1])
                if (0 <= new_pos[0] < self.model.grid.width) and (0 <= new_pos[1] < self.model.grid.height):
                    if new_pos not in self.model.occupied_positions or new_pos == self.pos:
                        self.model.grid.move_agent(self, new_pos)
                        self.model.occupied_positions.discard(self.pos)
                        self.model.occupied_positions.add(new_pos)
                        self.positions.append(new_pos)
                        break
                move_idx = (move_idx + 1) % 4
                attempts += 1
            else:
                self.positions.append(self.pos)
        else:
            while attempts < max_attempts:
                move = random.choice(moves)
                new_pos = (x + move[0], y + move[1])
                if (0 <= new_pos[0] < self.model.grid.width) and (0 <= new_pos[1] < self.model.grid.height):
                    if new_pos not in self.model.occupied_positions or new_pos == self.pos:
                        self.model.grid.move_agent(self, new_pos)
                        self.model.occupied_positions.discard(self.pos)
                        self.model.occupied_positions.add(new_pos)
                        self.positions.append(new_pos)
                        break
                attempts += 1
            else:
                self.positions.append(self.pos)

# TimeModel
class TimeModel(Model):
    def __init__(self, use_rule=True):
        super().__init__()
        self.grid = MultiGrid(10, 10, False)
        self.schedule = []
        self.random = random.Random()
        self.step_count = 0
        self.occupied_positions = set()
        self.use_rule = use_rule
        
        available_positions = [(x, y) for x in range(10) for y in range(10)]
        self.random.shuffle(available_positions)
        for i in range(30):
            agent = TimeAgent(self, use_rule=self.use_rule)
            start_pos = available_positions[i]
            self.grid.place_agent(agent, start_pos)
            agent.pos = start_pos
            agent.positions.append(start_pos)
            self.occupied_positions.add(start_pos)
            self.schedule.append(agent)

    def step(self):
        self.occupied_positions.clear()
        for agent in self.schedule:
            self.occupied_positions.add(agent.pos)
        random.shuffle(self.schedule)
        for agent in self.schedule:
            agent.step()
        self.step_count += 1

    def get_positions(self):
        sorted_agents = sorted(self.schedule, key=lambda a: a.unique_id)
        return [(agent.positions, [a.pos for a in sorted_agents if a != agent]) for agent in sorted_agents]

# TCN
class TCN(nn.Module):
    def __init__(self, input_size=60, output_size=5, num_channels=[128, 128, 128], kernel_size=7, dropout=0.2):
        super(TCN, self).__init__()
        layers = []
        for i in range(len(num_channels)):
            dilation = 2 ** i
            in_channels = input_size if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            padding = (kernel_size - 1) * dilation
            layers.append(nn.Conv1d(in_channels, out_channels, kernel_size, padding=padding, dilation=dilation))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            if padding > 0:
                layers.append(nn.ConstantPad1d((-padding, 0), 0))
        self.tcn = nn.Sequential(*layers)
        self.fc = nn.Linear(num_channels[-1], output_size)

    def forward(self, x):
        x = x.transpose(1, 2)
        out = self.tcn(x)
        out = out[:, :, -1]
        return out

# Data collection
def collect_and_save_data(num_runs=2000, filename="abm_data_with_rule_2.pkl", use_rule=True):
    all_data = []
    for run in range(num_runs):
        model = TimeModel(use_rule=use_rule)
        positions_history = []
        for _ in range(10):
            model.step()
            positions_history.append(model.get_positions())
        all_data.append(positions_history)
        if (run + 1) % 100 == 0:
            print(f"Completed {run + 1}/{num_runs} runs")
    with open(filename, 'wb') as f:
        pickle.dump(all_data, f)
    print(f"Saved {len(all_data)} runs to {filename}")
    return all_data

# Prepare data
def prepare_training_data(data, seq_len=5):
    X, y = [], []
    direction_map = {(0, 1): 0, (0, -1): 1, (1, 0): 2, (-1, 0): 3, (0, 0): 4}
    for run_data in data:
        for step_idx in range(len(run_data) - seq_len):
            step_data = run_data[step_idx:step_idx + seq_len]
            for positions, others in step_data[-1]:
                if len(positions) < seq_len + 1:
                    continue
                seq = positions[-seq_len - 1:-1]
                seq_data = []
                for pos in seq:
                    step_features = list(pos)
                    for ox, oy in others:
                        step_features.extend([ox - pos[0], oy - pos[1]])
                    while len(step_features) < 60:
                        step_features.extend([0, 0])
                    seq_data.append(step_features[:60])
                X.append(seq_data)
                x1, y1 = positions[-2]
                x2, y2 = positions[-1]
                direction = (x2 - x1, y2 - y1)
                y.append(direction_map[direction])
    X = torch.tensor(X, dtype=torch.float32) / 9.0
    y = torch.tensor(y, dtype=torch.long)
    return X, y

# Training
def train_tcn(X, y, epochs=300, batch_size=32, learning_rate=0.001, patience=20):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
    val_dataset = torch.utils.data.TensorDataset(X_val, y_val)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size)

    model = TCN(input_size=60, output_size=5, num_channels=[128, 128, 128], kernel_size=7, dropout=0.2)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

    best_val_loss = float('inf')
    epochs_no_improve = 0
    best_model_state = None

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        train_correct = 0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            train_correct += (outputs.argmax(dim=1) == batch_y).sum().item()

        model.eval()
        val_loss = 0
        val_correct = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                outputs = model(batch_X)
                val_loss += criterion(outputs, batch_y).item()
                val_correct += (outputs.argmax(dim=1) == batch_y).sum().item()

        scheduler.step()
        
        train_loss_avg = train_loss / len(train_loader)
        val_loss_avg = val_loss / len(val_loader)
        train_acc = train_correct / len(X_train)
        val_acc = val_correct / len(X_val)
        
        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss_avg:.4f}, Val Loss: {val_loss_avg:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, LR: {scheduler.get_last_lr()[0]:.6f}")
        
        if val_loss_avg < best_val_loss:
            best_val_loss = val_loss_avg
            epochs_no_improve = 0
            best_model_state = model.state_dict()
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"Early stopping triggered after {epoch+1} epochs")
                break
    
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    torch.save(model.state_dict(), "tcn_model_2.pth")
    print("Model saved to tcn_model_2.pth")
    return model

# Global variables
trained_model = None
direction_map = {0: "Up (0, 1)", 1: "Down (0, -1)", 2: "Right (1, 0)", 3: "Left (-1, 0)", 4: "No move (0, 0)"}
reverse_map = {(0, 1): 0, (0, -1): 1, (1, 0): 2, (-1, 0): 3, (0, 0): 4}

# Train with rule
sequences_with_rule = collect_and_save_data(num_runs=2000, filename="abm_data_with_rule_2.pkl", use_rule=True)
X, y = prepare_training_data(sequences_with_rule)
print(f"Loaded {len(sequences_with_rule)} runs with rule")
print(f"Training data shape: X={X.shape}, y={y.shape}")

print("\nTraining TCN (Direction Prediction with Rule)...")
trained_model = train_tcn(X, y, epochs=300, batch_size=32, learning_rate=0.001, patience=20)

# Compare with vs. without rule
def compare_rule_effects(runs=100, steps=10):
    no_rule_model = TimeModel(use_rule=False)
    no_rule_moves = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
    for _ in range(runs):
        for _ in range(steps):
            no_rule_model.step()
        for agent in no_rule_model.schedule:
            for i in range(len(agent.positions) - 1):
                dx, dy = agent.positions[i + 1][0] - agent.positions[i][0], agent.positions[i + 1][1] - agent.positions[i][1]
                move_idx = reverse_map.get((dx, dy), 4)
                no_rule_moves[move_idx] += 1

    with_rule_model = TimeModel(use_rule=True)
    with_rule_moves = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
    for _ in range(runs):
        for _ in range(steps):
            with_rule_model.step()
        for agent in with_rule_model.schedule:
            for i in range(len(agent.positions) - 1):
                dx, dy = agent.positions[i + 1][0] - agent.positions[i][0], agent.positions[i + 1][1] - agent.positions[i][1]
                move_idx = reverse_map.get((dx, dy), 4)
                with_rule_moves[move_idx] += 1

    total_no_rule = sum(no_rule_moves.values())
    total_with_rule = sum(with_rule_moves.values())
    print("\nNo Rule Move Frequencies:", {direction_map[k]: v/total_no_rule for k, v in no_rule_moves.items()})
    print("With Rule Move Frequencies:", {direction_map[k]: v/total_with_rule for k, v in with_rule_moves.items()})

# Run comparison
compare_rule_effects(runs=100, steps=10)

Completed 100/2000 runs
Completed 200/2000 runs
Completed 300/2000 runs
Completed 400/2000 runs
Completed 500/2000 runs
Completed 600/2000 runs
Completed 700/2000 runs
Completed 800/2000 runs
Completed 900/2000 runs
Completed 1000/2000 runs
Completed 1100/2000 runs
Completed 1200/2000 runs
Completed 1300/2000 runs
Completed 1400/2000 runs
Completed 1500/2000 runs
Completed 1600/2000 runs
Completed 1700/2000 runs
Completed 1800/2000 runs
Completed 1900/2000 runs
Completed 2000/2000 runs
Saved 2000 runs to abm_data_with_rule_2.pkl
Loaded 2000 runs with rule
Training data shape: X=torch.Size([300000, 5, 60]), y=torch.Size([300000])

Training TCN (Direction Prediction with Rule)...
Epoch [10/300], Train Loss: 2.3149, Val Loss: 1.7476, Train Acc: 0.3431, Val Acc: 0.3700, LR: 0.001000
Epoch [20/300], Train Loss: 2.2849, Val Loss: 1.7241, Train Acc: 0.3629, Val Acc: 0.3876, LR: 0.001000
Epoch [30/300], Train Loss: 2.2738, Val Loss: 1.7110, Train Acc: 0.3703, Val Acc: 0.3972, LR: 0.001000
Epoc

Add rule: center bias

In [1]:
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
import pickle
import random
from mesa import Agent, Model
from mesa.space import MultiGrid

# TimeAgent with Center Bias
class TimeAgent(Agent):
    def __init__(self, model, use_rule=True):
        super().__init__(model)
        self.model = model
        self.positions = []
        self.use_rule = use_rule

    def step(self):
        global trained_model
        x, y = self.pos
        moves = [(0, 1), (-1, 0), (0, -1), (1, 0)]
        
        avoid_move = None
        if len(self.positions) >= 5 and trained_model is not None and self.use_rule:
            seq = self.positions[-5:]
            others = [a.pos for a in self.model.schedule if a != self]
            input_data = []
            for pos in seq:
                step_features = list(pos)
                for ox, oy in others:
                    step_features.extend([ox - pos[0], oy - pos[1]])
                while len(step_features) < 60:
                    step_features.extend([0, 0])
                input_data.append(step_features[:60])
            input_data = torch.tensor([input_data], dtype=torch.float32) / 9.0
            with torch.no_grad():
                pred_dir = trained_model(input_data).argmax().item()
            avoid_move = list(reverse_map.keys())[pred_dir]

        attempts = 0
        max_attempts = 10
        move_idx = random.randint(0, 3)

        if self.use_rule:
            while attempts < max_attempts:
                move = moves[move_idx]
                if move == avoid_move and attempts < max_attempts - 1:
                    move_idx = (move_idx + 1) % 4
                    attempts += 1
                    continue
                new_pos = (x + move[0], y + move[1])
                if (0 <= new_pos[0] < self.model.grid.width) and (0 <= new_pos[1] < self.model.grid.height):
                    if new_pos not in self.model.occupied_positions or new_pos == self.pos:
                        self.model.grid.move_agent(self, new_pos)
                        self.model.occupied_positions.discard(self.pos)
                        self.model.occupied_positions.add(new_pos)
                        self.positions.append(new_pos)
                        break

                # Turn left if occupied, else bias toward center (5, 5)
                center_x, center_y = 5, 5
                if x < center_x and (1, 0) not in self.model.occupied_positions:
                    move_idx = 2   # Right
                elif x > center_x and (-1, 0) not in self.model.occupied_positions:
                    move_idx = 1   # Left
                elif y < center_y and (0, 1) not in self.model.occupied_positions:
                    move_idx = 0   # Up
                elif y > center_y and (0, -1) not in self.model.occupied_positions:
                    move_idx = 3   # Down
                else:
                    move_idx = (move_idx + 1) % 4
                attempts += 1
            else:
                self.positions.append(self.pos)
        else:
            while attempts < max_attempts:
                move = random.choice(moves)
                new_pos = (x + move[0], y + move[1])
                if (0 <= new_pos[0] < self.model.grid.width) and (0 <= new_pos[1] < self.model.grid.height):
                    if new_pos not in self.model.occupied_positions or new_pos == self.pos:
                        self.model.grid.move_agent(self, new_pos)
                        self.model.occupied_positions.discard(self.pos)
                        self.model.occupied_positions.add(new_pos)
                        self.positions.append(new_pos)
                        break
                attempts += 1
            else:
                self.positions.append(self.pos)

# TimeModel
class TimeModel(Model):
    def __init__(self, use_rule=True):
        super().__init__()
        self.grid = MultiGrid(10, 10, False)
        self.schedule = []
        self.random = random.Random()
        self.step_count = 0
        self.occupied_positions = set()
        self.use_rule = use_rule
        
        available_positions = [(x, y) for x in range(10) for y in range(10)]
        self.random.shuffle(available_positions)
        for i in range(30):
            agent = TimeAgent(self, use_rule=self.use_rule)
            start_pos = available_positions[i]
            self.grid.place_agent(agent, start_pos)
            agent.pos = start_pos
            agent.positions.append(start_pos)
            self.occupied_positions.add(start_pos)
            self.schedule.append(agent)

    def step(self):
        self.occupied_positions.clear()
        for agent in self.schedule:
            self.occupied_positions.add(agent.pos)
        random.shuffle(self.schedule)
        for agent in self.schedule:
            agent.step()
        self.step_count += 1

    def get_positions(self):
        sorted_agents = sorted(self.schedule, key=lambda a: a.unique_id)
        return [(agent.positions, [a.pos for a in sorted_agents if a != agent]) for agent in sorted_agents]

# TCN
class TCN(nn.Module):
    def __init__(self, input_size=60, output_size=5, num_channels=[256, 256, 256], kernel_size=9, dropout=0.2):
        super(TCN, self).__init__()
        layers = []
        for i in range(len(num_channels)):
            dilation = 2 ** i
            in_channels = input_size if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            padding = (kernel_size - 1) * dilation
            layers.append(nn.Conv1d(in_channels, out_channels, kernel_size, padding=padding, dilation=dilation))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            if padding > 0:
                layers.append(nn.ConstantPad1d((-padding, 0), 0))
        self.tcn = nn.Sequential(*layers)
        self.fc = nn.Linear(num_channels[-1], output_size)

    def forward(self, x):
        x = x.transpose(1, 2)
        out = self.tcn(x)
        out = out[:, :, -1]
        return out

# Data collection (10 steps)
def collect_and_save_data(num_runs=2000, filename="abm_data_with_rule_add.pkl", use_rule=True):
    all_data = []
    for run in range(num_runs):
        model = TimeModel(use_rule=use_rule)
        positions_history = []
        for _ in range(10):
            model.step()
            positions_history.append(model.get_positions())
        all_data.append(positions_history)
        if (run + 1) % 100 == 0:
            print(f"Completed {run + 1}/{num_runs} runs")
    with open(filename, 'wb') as f:
        pickle.dump(all_data, f)
    print(f"Saved {len(all_data)} runs to {filename}")
    return all_data

# Prepare data (Optimized for 10 steps)
def prepare_training_data(data, seq_len=5):
    X, y = [], []
    direction_map = {(0, 1): 0, (0, -1): 1, (1, 0): 2, (-1, 0): 3, (0, 0): 4}
    for run_data in data:
        for step_idx in range(len(run_data) - seq_len + 1):  # Maximize windows: 10 - 5 + 1 = 6
            step_data = run_data[step_idx:step_idx + seq_len]
            for positions, others in step_data[-1]:
                if len(positions) < step_idx + seq_len + 1:  # Ensure enough steps
                    continue
                seq = positions[step_idx:step_idx + seq_len]  # 5-step window
                seq_data = []
                for pos in seq:
                    step_features = list(pos)
                    for ox, oy in others:
                        step_features.extend([ox - pos[0], oy - pos[1]])
                    while len(step_features) < 60:
                        step_features.extend([0, 0])
                    seq_data.append(step_features[:60])
                X.append(seq_data)
                x1, y1 = positions[step_idx + seq_len - 1]  # Last of seq
                x2, y2 = positions[step_idx + seq_len]      # Target
                direction = (x2 - x1, y2 - y1)
                y.append(direction_map[direction])
    X = torch.tensor(X, dtype=torch.float32) / 9.0
    y = torch.tensor(y, dtype=torch.long)
    return X, y

# Training
def train_tcn(X, y, epochs=300, batch_size=32, learning_rate=0.001, patience=20):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
    val_dataset = torch.utils.data.TensorDataset(X_val, y_val)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size)

    model = TCN(input_size=60, output_size=5, num_channels=[256, 256, 256], kernel_size=9, dropout=0.2)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

    best_val_loss = float('inf')
    epochs_no_improve = 0
    best_model_state = None

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        train_correct = 0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            train_correct += (outputs.argmax(dim=1) == batch_y).sum().item()

        model.eval()
        val_loss = 0
        val_correct = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                outputs = model(batch_X)
                val_loss += criterion(outputs, batch_y).item()
                val_correct += (outputs.argmax(dim=1) == batch_y).sum().item()

        scheduler.step()
        
        train_loss_avg = train_loss / len(train_loader)
        val_loss_avg = val_loss / len(val_loader)
        train_acc = train_correct / len(X_train)
        val_acc = val_correct / len(X_val)
        
        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss_avg:.4f}, Val Loss: {val_loss_avg:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, LR: {scheduler.get_last_lr()[0]:.6f}")
        
        if val_loss_avg < best_val_loss:
            best_val_loss = val_loss_avg
            epochs_no_improve = 0
            best_model_state = model.state_dict()
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"Early stopping triggered after {epoch+1} epochs")
                break
    
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    torch.save(model.state_dict(), "tcn_model_add.pth")
    print("Model saved to tcn_model_add.pth")
    return model

# Global variables
trained_model = None
direction_map = {0: "Up (0, 1)", 1: "Down (0, -1)", 2: "Right (1, 0)", 3: "Left (-1, 0)", 4: "No move (0, 0)"}
reverse_map = {(0, 1): 0, (0, -1): 1, (1, 0): 2, (-1, 0): 3, (0, 0): 4}

# Train with rule
sequences_with_rule = collect_and_save_data(num_runs=2000, filename="abm_data_with_rule_add.pkl", use_rule=True)
X, y = prepare_training_data(sequences_with_rule)
print(f"Loaded {len(sequences_with_rule)} runs with rule")
print(f"Training data shape: X={X.shape}, y={y.shape}")

print("\nTraining TCN (Direction Prediction with Rule)...")
trained_model = train_tcn(X, y, epochs=300, batch_size=32, learning_rate=0.001, patience=20)

# Compare with vs. without rule
def compare_rule_effects(runs=100, steps=10):
    no_rule_model = TimeModel(use_rule=False)
    no_rule_moves = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
    for _ in range(runs):
        for _ in range(steps):
            no_rule_model.step()
        for agent in no_rule_model.schedule:
            for i in range(len(agent.positions) - 1):
                dx, dy = agent.positions[i + 1][0] - agent.positions[i][0], agent.positions[i + 1][1] - agent.positions[i][1]
                move_idx = reverse_map.get((dx, dy), 4)
                no_rule_moves[move_idx] += 1

    with_rule_model = TimeModel(use_rule=True)
    with_rule_moves = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
    for _ in range(runs):
        for _ in range(steps):
            with_rule_model.step()
        for agent in with_rule_model.schedule:
            for i in range(len(agent.positions) - 1):
                dx, dy = agent.positions[i + 1][0] - agent.positions[i][0], agent.positions[i + 1][1] - agent.positions[i][1]
                move_idx = reverse_map.get((dx, dy), 4)
                with_rule_moves[move_idx] += 1

    total_no_rule = sum(no_rule_moves.values())
    total_with_rule = sum(with_rule_moves.values())
    print("\nNo Rule Move Frequencies:", {direction_map[k]: v/total_no_rule for k, v in no_rule_moves.items()})
    print("With Rule Move Frequencies:", {direction_map[k]: v/total_with_rule for k, v in with_rule_moves.items()})

# Run comparison
compare_rule_effects(runs=100, steps=10)

Completed 100/2000 runs
Completed 200/2000 runs
Completed 300/2000 runs
Completed 400/2000 runs
Completed 500/2000 runs
Completed 600/2000 runs
Completed 700/2000 runs
Completed 800/2000 runs
Completed 900/2000 runs
Completed 1000/2000 runs
Completed 1100/2000 runs
Completed 1200/2000 runs
Completed 1300/2000 runs
Completed 1400/2000 runs
Completed 1500/2000 runs
Completed 1600/2000 runs
Completed 1700/2000 runs
Completed 1800/2000 runs
Completed 1900/2000 runs
Completed 2000/2000 runs
Saved 2000 runs to abm_data_with_rule_add.pkl
Loaded 2000 runs with rule
Training data shape: X=torch.Size([360000, 5, 60]), y=torch.Size([360000])

Training TCN (Direction Prediction with Rule)...
Epoch [10/300], Train Loss: 3.9247, Val Loss: 3.6444, Train Acc: 0.2626, Val Acc: 0.2801, LR: 0.001000


KeyboardInterrupt: 

In [2]:
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
import pickle
import random
from mesa import Agent, Model
from mesa.space import MultiGrid

# TimeAgent with Center Bias
class TimeAgent(Agent):
    def __init__(self, model, use_rule=True):
        super().__init__(model)
        self.model = model
        self.positions = []
        self.use_rule = use_rule

    def step(self):
        global trained_model
        x, y = self.pos
        moves = [(0, 1), (-1, 0), (0, -1), (1, 0)]
        
        avoid_move = None
        if len(self.positions) >= 5 and trained_model is not None and self.use_rule:
            seq = self.positions[-5:]
            others = [a.pos for a in self.model.schedule if a != self]
            input_data = []
            for pos in seq:
                step_features = list(pos)
                for ox, oy in others:
                    step_features.extend([ox - pos[0], oy - pos[1]])
                while len(step_features) < 60:
                    step_features.extend([0, 0])
                input_data.append(step_features[:60])
            input_data = torch.tensor([input_data], dtype=torch.float32) / 9.0
            with torch.no_grad():
                pred_dir = trained_model(input_data).argmax().item()
            avoid_move = list(reverse_map.keys())[pred_dir]

        attempts = 0
        max_attempts = 10
        move_idx = random.randint(0, 3)

        if self.use_rule:
            while attempts < max_attempts:
                move = moves[move_idx]
                if move == avoid_move and attempts < max_attempts - 1:
                    move_idx = (move_idx + 1) % 4
                    attempts += 1
                    continue
                new_pos = (x + move[0], y + move[1])
                if (0 <= new_pos[0] < self.model.grid.width) and (0 <= new_pos[1] < self.model.grid.height):
                    if new_pos not in self.model.occupied_positions or new_pos == self.pos:
                        self.model.grid.move_agent(self, new_pos)
                        self.model.occupied_positions.discard(self.pos)
                        self.model.occupied_positions.add(new_pos)
                        self.positions.append(new_pos)
                        break
                center_x, center_y = 5, 5
                if x < center_x and (1, 0) not in self.model.occupied_positions:
                    move_idx = 2
                elif x > center_x and (-1, 0) not in self.model.occupied_positions:
                    move_idx = 1
                elif y < center_y and (0, 1) not in self.model.occupied_positions:
                    move_idx = 0
                elif y > center_y and (0, -1) not in self.model.occupied_positions:
                    move_idx = 3
                else:
                    move_idx = (move_idx + 1) % 4
                attempts += 1
            else:
                self.positions.append(self.pos)
        else:
            while attempts < max_attempts:
                move = random.choice(moves)
                new_pos = (x + move[0], y + move[1])
                if (0 <= new_pos[0] < self.model.grid.width) and (0 <= new_pos[1] < self.model.grid.height):
                    if new_pos not in self.model.occupied_positions or new_pos == self.pos:
                        self.model.grid.move_agent(self, new_pos)
                        self.model.occupied_positions.discard(self.pos)
                        self.model.occupied_positions.add(new_pos)
                        self.positions.append(new_pos)
                        break
                attempts += 1
            else:
                self.positions.append(self.pos)

# TimeModel
class TimeModel(Model):
    def __init__(self, use_rule=True):
        super().__init__()
        self.grid = MultiGrid(10, 10, False)
        self.schedule = []
        self.random = random.Random()
        self.step_count = 0
        self.occupied_positions = set()
        self.use_rule = use_rule
        
        available_positions = [(x, y) for x in range(10) for y in range(10)]
        self.random.shuffle(available_positions)
        for i in range(30):
            agent = TimeAgent(self, use_rule=self.use_rule)
            start_pos = available_positions[i]
            self.grid.place_agent(agent, start_pos)
            agent.pos = start_pos
            agent.positions.append(start_pos)
            self.occupied_positions.add(start_pos)
            self.schedule.append(agent)

    def step(self):
        self.occupied_positions.clear()
        for agent in self.schedule:
            self.occupied_positions.add(agent.pos)
        random.shuffle(self.schedule)
        for agent in self.schedule:
            agent.step()
        self.step_count += 1

    def get_positions(self):
        sorted_agents = sorted(self.schedule, key=lambda a: a.unique_id)
        return [(agent.positions, [a.pos for a in sorted_agents if a != agent]) for agent in sorted_agents]

# TCN
class TCN(nn.Module):
    def __init__(self, input_size=60, output_size=5, num_channels=[128, 128, 128], kernel_size=7, dropout=0.2):
        super(TCN, self).__init__()
        layers = []
        for i in range(len(num_channels)):
            dilation = 2 ** i
            in_channels = input_size if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            padding = (kernel_size - 1) * dilation
            layers.append(nn.Conv1d(in_channels, out_channels, kernel_size, padding=padding, dilation=dilation))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            if padding > 0:
                layers.append(nn.ConstantPad1d((-padding, 0), 0))
        self.tcn = nn.Sequential(*layers)
        self.fc = nn.Linear(num_channels[-1], output_size)

    def forward(self, x):
        x = x.transpose(1, 2)
        out = self.tcn(x)
        out = out[:, :, -1]
        return out

# Data collection (10 steps)
def collect_and_save_data(num_runs=2000, filename="abm_data_with_rule_3.pkl", use_rule=True):
    all_data = []
    for run in range(num_runs):
        model = TimeModel(use_rule=use_rule)
        positions_history = []
        for _ in range(10):
            model.step()
            positions_history.append(model.get_positions())
        all_data.append(positions_history)
        if (run + 1) % 100 == 0:
            print(f"Completed {run + 1}/{num_runs} runs")
    with open(filename, 'wb') as f:
        pickle.dump(all_data, f)
    print(f"Saved {len(all_data)} runs to {filename}")
    return all_data

# Prepare data
def prepare_training_data(data, seq_len=5):
    X, y = [], []
    direction_map = {(0, 1): 0, (0, -1): 1, (1, 0): 2, (-1, 0): 3, (0, 0): 4}
    for run_data in data:
        for step_idx in range(len(run_data) - seq_len + 1):
            step_data = run_data[step_idx:step_idx + seq_len]
            for positions, others in step_data[-1]:
                if len(positions) < step_idx + seq_len + 1:
                    continue
                seq = positions[step_idx:step_idx + seq_len]
                seq_data = []
                for pos in seq:
                    step_features = list(pos)
                    for ox, oy in others:
                        step_features.extend([ox - pos[0], oy - pos[1]])
                    while len(step_features) < 60:
                        step_features.extend([0, 0])
                    seq_data.append(step_features[:60])
                X.append(seq_data)
                x1, y1 = positions[step_idx + seq_len - 1]
                x2, y2 = positions[step_idx + seq_len]
                direction = (x2 - x1, y2 - y1)
                y.append(direction_map[direction])
    X = torch.tensor(X, dtype=torch.float32) / 9.0
    y = torch.tensor(y, dtype=torch.long)
    return X, y

# Training
def train_tcn(X, y, epochs=300, batch_size=64, learning_rate=0.001, patience=20):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
    val_dataset = torch.utils.data.TensorDataset(X_val, y_val)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size)

    model = TCN(input_size=60, output_size=5, num_channels=[128, 128, 128], kernel_size=7, dropout=0.2)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

    best_val_loss = float('inf')
    epochs_no_improve = 0
    best_model_state = None

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        train_correct = 0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            train_correct += (outputs.argmax(dim=1) == batch_y).sum().item()

        model.eval()
        val_loss = 0
        val_correct = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                outputs = model(batch_X)
                val_loss += criterion(outputs, batch_y).item()
                val_correct += (outputs.argmax(dim=1) == batch_y).sum().item()

        scheduler.step()
        
        train_loss_avg = train_loss / len(train_loader)
        val_loss_avg = val_loss / len(val_loader)
        train_acc = train_correct / len(X_train)
        val_acc = val_correct / len(X_val)
        
        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss_avg:.4f}, Val Loss: {val_loss_avg:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, LR: {scheduler.get_last_lr()[0]:.6f}")
        
        if val_loss_avg < best_val_loss:
            best_val_loss = val_loss_avg
            epochs_no_improve = 0
            best_model_state = model.state_dict()
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"Early stopping triggered after {epoch+1} epochs")
                break
    
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    torch.save(model.state_dict(), "tcn_model_3.pth")
    print("Model saved to tcn_model_3.pth")
    return model


In [3]:
# Global variables
trained_model = None
direction_map = {0: "Up (0, 1)", 1: "Down (0, -1)", 2: "Right (1, 0)", 3: "Left (-1, 0)", 4: "No move (0, 0)"}
reverse_map = {(0, 1): 0, (0, -1): 1, (1, 0): 2, (-1, 0): 3, (0, 0): 4}

# Train with rule
sequences_with_rule = collect_and_save_data(num_runs=2000, filename="abm_data_with_rule_3.pkl", use_rule=True)
X, y = prepare_training_data(sequences_with_rule)
print(f"Loaded {len(sequences_with_rule)} runs with rule")
print(f"Training data shape: X={X.shape}, y={y.shape}")

print("\nTraining TCN (Direction Prediction with Rule)...")
trained_model = train_tcn(X, y, epochs=300, batch_size=64, learning_rate=0.001, patience=20)

# Compare with vs. without rule
def compare_rule_effects(runs=100, steps=10):
    no_rule_model = TimeModel(use_rule=False)
    no_rule_moves = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
    for _ in range(runs):
        for _ in range(steps):
            no_rule_model.step()
        for agent in no_rule_model.schedule:
            for i in range(len(agent.positions) - 1):
                dx, dy = agent.positions[i + 1][0] - agent.positions[i][0], agent.positions[i + 1][1] - agent.positions[i][1]
                move_idx = reverse_map.get((dx, dy), 4)
                no_rule_moves[move_idx] += 1

    with_rule_model = TimeModel(use_rule=True)
    with_rule_moves = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
    for _ in range(runs):
        for _ in range(steps):
            with_rule_model.step()
        for agent in with_rule_model.schedule:
            for i in range(len(agent.positions) - 1):
                dx, dy = agent.positions[i + 1][0] - agent.positions[i][0], agent.positions[i + 1][1] - agent.positions[i][1]
                move_idx = reverse_map.get((dx, dy), 4)
                with_rule_moves[move_idx] += 1

    total_no_rule = sum(no_rule_moves.values())
    total_with_rule = sum(with_rule_moves.values())
    print("\nNo Rule Move Frequencies:", {direction_map[k]: v/total_no_rule for k, v in no_rule_moves.items()})
    print("With Rule Move Frequencies:", {direction_map[k]: v/total_with_rule for k, v in with_rule_moves.items()})

# Run comparison
compare_rule_effects(runs=100, steps=10)

Completed 100/2000 runs
Completed 200/2000 runs
Completed 300/2000 runs
Completed 400/2000 runs
Completed 500/2000 runs
Completed 600/2000 runs
Completed 700/2000 runs
Completed 800/2000 runs
Completed 900/2000 runs
Completed 1000/2000 runs
Completed 1100/2000 runs
Completed 1200/2000 runs
Completed 1300/2000 runs
Completed 1400/2000 runs
Completed 1500/2000 runs
Completed 1600/2000 runs
Completed 1700/2000 runs
Completed 1800/2000 runs
Completed 1900/2000 runs
Completed 2000/2000 runs
Saved 2000 runs to abm_data_with_rule_3.pkl
Loaded 2000 runs with rule
Training data shape: X=torch.Size([360000, 5, 60]), y=torch.Size([360000])

Training TCN (Direction Prediction with Rule)...
Epoch [10/300], Train Loss: 3.0290, Val Loss: 2.6938, Train Acc: 0.2859, Val Acc: 0.2952, LR: 0.001000
Epoch [20/300], Train Loss: 3.0124, Val Loss: 2.6229, Train Acc: 0.2934, Val Acc: 0.3073, LR: 0.001000
Epoch [30/300], Train Loss: 3.0106, Val Loss: 2.6400, Train Acc: 0.2961, Val Acc: 0.3086, LR: 0.001000
Epoc

Notes:

In [14]:
import torch
import pickle

def collect_and_save_data(num_runs=1000, filename="abm_data.pkl"):
    all_sequences = []
    for run in range(num_runs):
        model = TimeModel()
        for _ in range(5):
            model.step()    
        sequences = model.get_positions()
        all_sequences.extend(sequences)
        if (run + 1) % 100 == 0:  # Print progress every 10 runs
            print(f"Completed {run + 1}/{num_runs} runs")
    
    # Save to file
    with open(filename, 'wb') as f:
        pickle.dump(all_sequences, f)
    print(f"Saved {len(all_sequences)} sequences to {filename}")
    return all_sequences

def load_data(filename="abm_data.pkl"):
    with open(filename, 'rb') as f:
        sequences = pickle.load(f)
    return sequences
'''
def prepare_training_data(sequences, seq_len=5):
    X, y = [], []
    for seq in sequences:
        if len(seq) >= seq_len + 1:  # Ensure enough positions
            X.append(seq[:seq_len])  # Input: first 5 positions
            y.append(seq[seq_len])   # Target: 6th position
    X = torch.tensor(X, dtype=torch.float32)  # Shape: (num_samples, seq_len, 2)
    y = torch.tensor(y, dtype=torch.float32)  # Shape: (num_samples, 2)
    return X, y
'''

# Updated prepare_training_data for classification
def prepare_training_data(sequences, seq_len=5):
    X, y = [], []
    for seq in sequences:
        if len(seq) >= seq_len + 1:
            X.append(seq[:seq_len])
            # Convert (x, y) target to grid index: y * 10 + x
            x, y_coord = seq[seq_len]
            y.append(y_coord * 10 + x)
    X = torch.tensor(X, dtype=torch.float32) / 9.0  # Normalize inputs
    y = torch.tensor(y, dtype=torch.long)  # Long for class indices
    return X, y

# Collect and save
sequences = collect_and_save_data(num_runs=1000)

# Later, load and prepare
# sequences = load_data()
X, y = prepare_training_data(sequences)
print(f"Loaded {len(sequences)} sequences")
print(f"Training data shape: X={X.shape}, y={y.shape}")


# Example: Print a few sequences for verification
for i in range(min(3, len(sequences))):
    print(f"Sample sequence {i+1}: {sequences[i]}")

Completed 100/1000 runs
Completed 200/1000 runs
Completed 300/1000 runs
Completed 400/1000 runs
Completed 500/1000 runs
Completed 600/1000 runs
Completed 700/1000 runs
Completed 800/1000 runs
Completed 900/1000 runs
Completed 1000/1000 runs
Saved 5000 sequences to abm_data.pkl
Loaded 5000 sequences
Training data shape: X=torch.Size([4998, 5, 2]), y=torch.Size([4998])
Sample sequence 1: [(6, 3), (5, 3), (5, 2), (5, 3), (5, 2), (6, 2)]
Sample sequence 2: [(8, 6), (8, 7), (9, 7), (8, 7), (8, 6), (8, 7)]
Sample sequence 3: [(0, 4), (0, 3), (0, 4), (1, 4), (1, 5), (1, 4)]


In [None]:
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split

# Define the BiLSTM model
class BiLSTM(nn.Module):
    def __init__(self, input_size=2, hidden_size=32, output_size=100): # 100 classes for 10x10 grid
        super(BiLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size * 2, output_size) # Output: 100 grid positions
                                                          # Output size of LSTM is hidden_size * 2 (forward + backward)

    def forward(self, x):
                               # x shape: (batch_size, seq_len, input_size)
        out, _ = self.lstm(x)  # out: (batch_size, seq_len, hidden_size * 2)
        out = self.fc(out[:, -1, :])  # Raw logits for classification # Take the last time step: (batch_size, output_size)
        return out

# Training function with early stopping
def train_rnn(X, y, epochs=300, batch_size=32, learning_rate=0.001, patience=20):

    # Split into train and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
    val_dataset = torch.utils.data.TensorDataset(X_val, y_val)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size)

    # Initialize BiLSTM model, loss, and optimizer
    model = BiLSTM(input_size=2, hidden_size=32, output_size=100)
    criterion = nn.CrossEntropyLoss()  # For classification
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

    # Early stopping variables
    best_val_loss = float('inf')
    epochs_no_improve = 0
    best_model_state = None

    # Training loop
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                outputs = model(batch_X)
                val_loss += criterion(outputs, batch_y).item()
        
        scheduler.step()
        train_loss_avg = train_loss / len(train_loader)
        val_loss_avg = val_loss / len(val_loader)

        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss_avg:.4f}, Val Loss: {val_loss_avg:.4f}, LR: {scheduler.get_last_lr()[0]:.6f}")
        
        # Early stopping check
        if val_loss_avg < best_val_loss:
            best_val_loss = val_loss_avg
            epochs_no_improve = 0
            best_model_state = model.state_dict()  # Save best model
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"Early stopping triggered after {epoch+1} epochs")
                break

    # Load best model and save
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    torch.save(model.state_dict(), "bilstm_model.pth")
    print("Model saved to bilstm_model.pth")
    return model

# Assuming X, y, and sequences are from your 1000-run data
X, y = prepare_training_data(sequences)  # Updated function call
print(f"Training data shape: X={X.shape}, y={y.shape}")

print("\nTraining BiLSTM (Classification)...")
trained_model = train_rnn(X, y, epochs=300, batch_size=32, learning_rate=0.001, patience=20)


# Test the model with a few sample sequences
trained_model.eval()
with torch.no_grad():
    sample_X = X[:5] # Already normalized
    predictions = trained_model(sample_X).argmax(dim=1) # Get class index
    
    print("\nSample predictions vs. actual:")
    for i in range(5):
        pred_idx = predictions[i].item()
        pred_x, pred_y = pred_idx % 10, pred_idx // 10  # Convert index back to (x, y)
        print(f"Sequence {i+1}: {sequences[i]}")
        print(f"Predicted next: ({pred_x}, {pred_y}), Actual: {sequences[i][5]}")

        #print(f"Sequence {i+1}: {sequences[i]}")
        #pred = predictions[i].tolist()
        #print(f"Predicted next: ({pred[0]:.2f}, {pred[1]:.2f}), Actual: {sequences[i][5]}")
        
    

Training data shape: X=torch.Size([4998, 5, 2]), y=torch.Size([4998])

Training BiLSTM (Classification)...
Epoch [10/300], Train Loss: 2.9309, Val Loss: 2.9550, LR: 0.001000
Epoch [20/300], Train Loss: 2.6277, Val Loss: 2.6621, LR: 0.001000
Epoch [30/300], Train Loss: 2.4649, Val Loss: 2.5072, LR: 0.001000
Epoch [40/300], Train Loss: 2.3657, Val Loss: 2.4204, LR: 0.001000
Epoch [50/300], Train Loss: 2.2955, Val Loss: 2.3561, LR: 0.000500
Epoch [60/300], Train Loss: 2.2505, Val Loss: 2.3216, LR: 0.000500
Epoch [70/300], Train Loss: 2.2288, Val Loss: 2.3013, LR: 0.000500
Epoch [80/300], Train Loss: 2.2087, Val Loss: 2.2836, LR: 0.000500
Epoch [90/300], Train Loss: 2.1879, Val Loss: 2.2748, LR: 0.000500
Epoch [100/300], Train Loss: 2.1696, Val Loss: 2.2646, LR: 0.000250
Epoch [110/300], Train Loss: 2.1539, Val Loss: 2.2613, LR: 0.000250
Epoch [120/300], Train Loss: 2.1460, Val Loss: 2.2525, LR: 0.000250
Epoch [130/300], Train Loss: 2.1389, Val Loss: 2.2493, LR: 0.000250
Epoch [140/300], T