Model: GNN (Generic)

In [1]:
import pandas as pd
# dataset contains 23991306 rows x 4 columns
num_nodes = 10000 # Truncated because of limited resource
dataset = pd.read_csv('/mnt/f/Personal/prj-ml-train-collision-avoidance/dataset/dataset.csv').head(num_nodes)

In [2]:
# Preprocess
dataset['train_speed'] = (dataset['train_speed'] - dataset['train_speed'].mean()) / dataset['train_speed'].std()

In [3]:
import networkx as nx

# Create a directed graph
G = nx.DiGraph()

# Add nodes and edges
for index, row in dataset.iterrows():
    obs_y = row['obs_y']
    obs_x = row['obs_x']
    train_speed = row['train_speed']
    action = row['action']
    
    # Add node with features
    G.add_node(index, obs_y=obs_y, obs_x=obs_x, train_speed=train_speed, action=action)
    
    # Add edges (example: connect consecutive rows)
    if index > 0:
        G.add_edge(index - 1, index)

In [4]:
# Define GNN
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

In [None]:
# Prepare the data
import torch
from torch_geometric.data import Data

edges = [(i, i+1) for i in range(num_nodes-1)]  # Example edges

x = torch.randn((num_nodes, 4))  # 4 features per node

# Edge index
edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()


data = Data(x=x, edge_index=edge_index)

data.y = torch.randint(0, 2, (num_nodes,))

In [7]:
# TASK: Train the model
from torch_geometric.loader import DataLoader

# Training:Test Split: 80:20
train_data = data[:int(0.8 * num_nodes)]
test_data = data[int(0.8 * num_nodes):]
train_loader = DataLoader([train_data], batch_size=1, shuffle=True)
test_loader = DataLoader([test_data], batch_size=1, shuffle=False)

# Initialize Model
model = GCN(in_channels=4, hidden_channels=16, out_channels=2)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Train Model
model.train()
for epoch in range(100):
    for batch in train_loader:
        optimizer.zero_grad()
        out = model(batch)
        loss = criterion(out, batch.y)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

TypeError: unhashable type: 'slice'

In [None]:
# Evaluate Model
model.eval()
correct = 0
for batch in test_loader:
    out = model(batch)
    pred = out.argmax(dim=1)
    correct += (pred == batch.y).sum().item()
accuracy = correct / len(test_data.y)
print(f'Accuracy: {accuracy:.4f}')