In [4]:
!pip install torch-geometric

Collecting torch-geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: torch-geometric
Successfully installed torch-geometric-2.6.1


In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree
def hebbian_update(weight, pre_activation, post_activation, learning_rate):
    delta_w = learning_rate * torch.outer(post_activation, pre_activation)
    return weight + delta_w

def hebbian_update_with_oja(weight, pre_activation, post_activation, learning_rate):
    delta_w = learning_rate * torch.outer(post_activation, pre_activation)
    weight = weight + delta_w
    weight = weight / torch.norm(weight, p=2)  # Normalize to prevent explosion
    return weight




In [118]:
import networkx as nx
import numpy as np

def create_grid_graph(width, height, blocked_positions):
    G = nx.grid_2d_graph(width, height)
    mapping = {node: i for i, node in enumerate(G.nodes())}
    G = nx.relabel_nodes(G, mapping)

    for pos in blocked_positions:
        if pos in mapping:
            G.remove_node(mapping[pos])

    return G, mapping  # Return mapping for agent indexing

width, height = 10, 10
blocked_positions = [(1, 1), (2, 2), (3, 3)]
graph,mapping = create_grid_graph(width, height, blocked_positions)


In [166]:
import random

class Agent:
    def __init__(self, start, goal):
        self.start = start
        self.goal = goal
        self.position = start

def initialize_agents(num_agents, graph, mapping):
    nodes = list(graph.nodes)
    agents = []
    used_positions = set()

    for _ in range(num_agents):
        start = random.choice(nodes)
        while start in used_positions:
            start = random.choice(nodes)
        used_positions.add(start)

        goal = random.choice(nodes)
        while goal in used_positions:
            goal = random.choice(nodes)
        used_positions.add(goal)

        agents.append(Agent(start, goal))

    return agents


num_agents = 5
agents = initialize_agents(num_agents, graph,mapping)


In [200]:
import torch
from torch_geometric.utils import from_scipy_sparse_matrix

def get_edge_index(graph):
    edge_index = torch.tensor(list(graph.edges), dtype=torch.long).t().contiguous()
    return edge_index

edge_index = get_edge_index(graph)
print(edge_index)
# adj_matrix = torch.tensor(adj_matrix)
# edge_index = adj_matrix.nonzero(as_tuple=False).t().contiguous()


tensor([[ 0,  0,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  7,  8,  8,  9,
         10, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 20, 20, 21,
         23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 30, 30, 31, 31, 32, 34,
         34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 40, 40, 41, 41, 42, 42, 43, 43,
         44, 44, 45, 45, 46, 46, 47, 47, 48, 48, 49, 50, 50, 51, 51, 52, 52, 53,
         53, 54, 54, 55, 55, 56, 56, 57, 57, 58, 58, 59, 60, 60, 61, 61, 62, 62,
         63, 63, 64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 70, 70, 71, 71, 72,
         72, 73, 73, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 80, 80, 81, 81,
         82, 82, 83, 83, 84, 84, 85, 85, 86, 86, 87, 87, 88, 88, 89, 90, 91, 92,
         93, 94, 95, 96, 97, 98],
        [10,  1,  2, 12,  3, 13,  4, 14,  5, 15,  6, 16,  7, 17,  8, 18,  9, 19,
         20, 13, 23, 14, 24, 15, 25, 16, 26, 17, 27, 18, 28, 19, 29, 30, 21, 31,
         24, 34, 25, 35, 26, 36, 27, 37, 28, 38, 29, 39, 40, 31, 41, 32, 42

In [210]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv,SAGEConv
from torch_geometric.data import Data

class GNNPolicy(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GNNPolicy, self).__init__()

        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        print(x.shape)
        print(edge_index.shape)
        x = F.relu(self.conv1(x, edge_index))
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

# Example usage
input_dim = 2  # e.g., agent position and goal position
hidden_dim = edge_index.shape[1]
output_dim = 5  # up, down, left, right, stay
policy_net = GNNPolicy(input_dim, hidden_dim, output_dim)


In [154]:
num_nodes = graph.number_of_nodes()

In [216]:
import torch.optim as optim

optimizer = optim.Adam(policy_net.parameters(), lr=0.01)

def select_action(state, policy_net, edge_index):
    state_tensor = torch.tensor(state, dtype=torch.float)
    data = Data(x=state_tensor, edge_index=edge_index)

    print(data.x.shape, data.edge_index.shape)
    print(type(state))
    probs = policy_net(data)
    m = torch.distributions.Categorical(probs[state])  # Get action for this agent
    action = m.sample()
    
    return action.item(), m.log_prob(action)


def update_policy(policy_net, optimizer, log_probs, rewards, gamma=0.99):
    discounted_rewards = []
    R = 0
    for r in rewards[::-1]:
        R = r + gamma * R
        discounted_rewards.insert(0, R)
    discounted_rewards = torch.tensor(discounted_rewards)
    discounted_rewards = (discounted_rewards - discounted_rewards.mean()) / (discounted_rewards.std() + 1e-5)
    policy_loss = []
    for log_prob, reward in zip(log_probs, discounted_rewards):
        policy_loss.append(-log_prob * reward)
    optimizer.zero_grad()
    policy_loss = torch.cat(policy_loss).sum()
    policy_loss.backward()
    optimizer.step()


In [217]:
num_episodes = 1000

for episode in range(num_episodes):
    log_probs = []
    rewards = []
    for agent in agents:
        state = [agent.position, agent.goal]
        action, log_prob = select_action(state, policy_net,edge_index)
        log_probs.append(log_prob)
        # Execute action and observe reward
        reward = 0
        
        # Goal achievement
        if agent.position == agent.goal:
            reward += 10
        
        # Collision penalty
        for other_agent in agents:
            if other_agent != agent and agent.position == other_agent.position:
                reward -= 5
        
        # Step penalty
        reward -= 1
        # Update agent position based on action
        # Compute reward based on new state
        rewards.append(reward)
    update_policy(policy_net, optimizer, log_probs, rewards)


torch.Size([2]) torch.Size([2, 168])
<class 'list'>
torch.Size([2])
torch.Size([2, 168])


IndexError: Dimension out of range (expected to be in range of [-1, 0], but got -2)