In [37]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_cluster import random_walk

from torch_geometric.loader import NeighborSampler as RawNeighborSampler
from torch_geometric.nn import SAGEConv
import pandas as pd

In [38]:
street_nodes_df = pd.read_csv("./outputs/nm_street_nodes.csv")
street_nodes_features_tensor = torch.tensor(street_nodes_df.values.tolist())
number_of_nodes = len(street_nodes_features_tensor)
number_of_node_features = len(street_nodes_features_tensor[0])
print(street_nodes_features_tensor)
print(number_of_nodes)
print(number_of_node_features)

tensor([[0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0.]])
8242
6


In [39]:
street_edges_df = pd.read_csv("./outputs/nm_street_edges.csv")
source_street_index,targe_street_index,street_distance_weight = street_edges_df["source_street"],street_edges_df["target_street"],street_edges_df["distance"]
street_edges_source_index_tensor = torch.tensor([source_street_index.values.tolist()])
street_edges_target_index_tensor = torch.tensor([targe_street_index.values.tolist()])
street_edges_index_tensor = torch.cat((street_edges_source_index_tensor,street_edges_target_index_tensor),0)
street_edges_weight_tensor = torch.tensor([street_distance_weight.values.tolist()])
print(street_edges_index_tensor)
print(street_edges_weight_tensor)

tensor([[   0,    1,    2,  ..., 8106, 8154, 8155],
        [   1,    0,    4,  ..., 8105, 8155, 8154]])
tensor([[30.8780, 30.8780, 29.9170,  ..., 97.1960,  9.6410,  9.6410]])


In [50]:
class NeighborSampler(RawNeighborSampler):
    def sample(self, batch):
        batch = torch.tensor(batch)
        row, col, _ = self.adj_t.coo()

        # For each node in `batch`, we sample a direct neighbor (as positive
        # example) and a random node (as negative example):
        pos_batch = random_walk(row, col, batch, walk_length=1,
                                coalesced=False)[:, 1]

        neg_batch = torch.randint(0, self.adj_t.size(1), (batch.numel(), ),
                                  dtype=torch.long)

        batch = torch.cat([batch, pos_batch, neg_batch], dim=0)
        return super().sample(batch)


train_loader = NeighborSampler(street_edges_index_tensor, sizes=[10, 10], batch_size=128,
                               shuffle=False, num_nodes=number_of_nodes)

In [51]:
class SAGE(nn.Module):
    def __init__(self, in_channels, hidden_channels, num_layers):
        super().__init__()
        self.num_layers = num_layers
        self.convs = nn.ModuleList()
        for i in range(num_layers):
            in_channels = in_channels if i == 0 else hidden_channels
            self.convs.append(SAGEConv(in_channels, hidden_channels))

    def forward(self, x, adjs):
        for i, (edge_index, _, size) in enumerate(adjs):
            x_target = x[:size[1]]  # Target nodes are always placed first.
            x = self.convs[i]((x, x_target), edge_index)
            if i != self.num_layers - 1:
                x = x.relu()
                x = F.dropout(x, p=0.5, training=self.training)
        return x

    def full_forward(self, x, edge_index):
        for i, conv in enumerate(self.convs):
            x = conv(x, edge_index)
            if i != self.num_layers - 1:
                x = x.relu()
                x = F.dropout(x, p=0.5, training=self.training)
        return x


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SAGE(number_of_node_features, hidden_channels=64, num_layers=2)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
x, edge_index = street_nodes_features_tensor.to(device), street_edges_index_tensor.to(device)

In [54]:
def train():
    model.train()

    total_loss = 0
    for batch_size, n_id, adjs in train_loader:
        # `adjs` holds a list of `(edge_index, e_id, size)` tuples.
        adjs = [adj.to(device) for adj in adjs]
        optimizer.zero_grad()

        out = model(x[n_id], adjs)
        out, pos_out, neg_out = out.split(out.size(0) // 3, dim=0)

        pos_loss = F.logsigmoid((out * pos_out).sum(-1)).mean()
        neg_loss = F.logsigmoid(-(out * neg_out).sum(-1)).mean()
        loss = -pos_loss - neg_loss
        loss.backward()
        optimizer.step()

        total_loss += float(loss) * out.size(0)

    return total_loss / number_of_nodes


@torch.no_grad()
def get_model_embedding():
    model.eval()
    embedding = model.full_forward(x, edge_index).cpu()
    print(embedding)
    return embedding
    #clf = LogisticRegression()
    #clf.fit(out[data.train_mask], data.y[data.train_mask])

    #val_acc = clf.score(out[data.val_mask], data.y[data.val_mask])
    #test_acc = clf.score(out[data.test_mask], data.y[data.test_mask])

    #return val_acc, test_acc


for epoch in range(1, 51):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, ')
          # f'Val: {val_acc:.4f}, Test: {test_acc:.4f}')

get_model_embedding()

Epoch: 001, Loss: 1.1695, 
Epoch: 002, Loss: 1.1907, 
Epoch: 003, Loss: 1.1823, 
Epoch: 004, Loss: 1.1664, 
Epoch: 005, Loss: 1.1785, 
Epoch: 006, Loss: 1.1709, 
Epoch: 007, Loss: 1.1778, 
Epoch: 008, Loss: 1.1856, 
Epoch: 009, Loss: 1.1618, 
Epoch: 010, Loss: 1.1823, 
Epoch: 011, Loss: 1.1788, 
Epoch: 012, Loss: 1.1800, 
Epoch: 013, Loss: 1.1744, 
Epoch: 014, Loss: 1.1658, 
Epoch: 015, Loss: 1.1967, 
Epoch: 016, Loss: 1.1696, 
Epoch: 017, Loss: 1.1800, 
Epoch: 018, Loss: 1.1712, 
Epoch: 019, Loss: 1.1799, 
Epoch: 020, Loss: 1.1765, 
Epoch: 021, Loss: 1.1709, 
Epoch: 022, Loss: 1.1784, 
Epoch: 023, Loss: 1.1800, 
Epoch: 024, Loss: 1.1753, 
Epoch: 025, Loss: 1.1759, 
Epoch: 026, Loss: 1.1705, 
Epoch: 027, Loss: 1.1650, 
Epoch: 028, Loss: 1.1786, 
Epoch: 029, Loss: 1.1818, 
Epoch: 030, Loss: 1.1814, 
Epoch: 031, Loss: 1.1619, 
Epoch: 032, Loss: 1.1895, 
Epoch: 033, Loss: 1.1899, 
Epoch: 034, Loss: 1.1790, 
Epoch: 035, Loss: 1.1645, 
Epoch: 036, Loss: 1.1762, 
Epoch: 037, Loss: 1.1865, 
E