In [712]:
from typing import Optional, Tuple, Union

import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor

from torch_geometric.loader import NeighborSampler as RawNeighborSampler
from torch_geometric.nn import SAGEConv
import pandas as pd

In [713]:
street_nodes_df = pd.read_csv("./outputs/nm_street_nodes.csv")
street_nodes_df = street_nodes_df[street_nodes_df.columns[4:]]
street_nodes_df.drop(["street_length","Average_POI_Distance"],axis=1 ,inplace=True)

street_nodes_features_tensor = torch.tensor(street_nodes_df.values.tolist())
number_of_nodes = len(street_nodes_features_tensor)
number_of_node_features = len(street_nodes_features_tensor[0])
print(street_nodes_features_tensor)
print(number_of_nodes)
print(number_of_node_features)

tensor([[0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0.]])
8242
6


In [714]:
street_edges_df = pd.read_csv("./outputs/nm_street_edges.csv")
source_street_index,targe_street_index,street_distance_weight = street_edges_df["source_street"],street_edges_df["target_street"],street_edges_df["distance"]
street_edges_source_index_tensor = torch.tensor([source_street_index.values.tolist()])
street_edges_target_index_tensor = torch.tensor([targe_street_index.values.tolist()])
street_edges_index_tensor = torch.cat((street_edges_source_index_tensor,street_edges_target_index_tensor),0)
street_edges_weight_tensor = torch.tensor(street_distance_weight.values.tolist())
print(street_edges_index_tensor)
print(street_edges_weight_tensor)

tensor([[   0,    1,    0,  ..., 8238, 8239, 8240],
        [   1,    0,    2,  ..., 8237, 8240, 8239]])
tensor([30.8780, 30.8780, 27.4470,  ..., 56.6500, 14.9740, 14.9740])


In [715]:
@torch.jit.script
def torch_random_walk(
    row: Tensor,
    col: Tensor,
    start: Tensor,
    walk_length: int,
    coalesced: bool = True,
    num_nodes: Optional[int] = None,
    return_edge_indices: bool = False,
) -> Union[Tensor, Tuple[Tensor, Tensor]]:
    if num_nodes is None:
        num_nodes = max(int(row.max()), int(col.max()), int(start.max())) + 1

    if coalesced:
        perm = torch.argsort(row * num_nodes + col)
        row, col = row[perm], col[perm]

    #print("origin batch \n",start)

    deg = row.new_zeros(num_nodes)
    deg.scatter_add_(0, row, torch.ones_like(row))
    rowptr = row.new_zeros(num_nodes + 1)
    torch.cumsum(deg, 0, out=rowptr[1:])

    node_seq, edge_seq = torch.ops.torch_cluster.random_walk(
        rowptr, col, start, walk_length, 1.0,1.0,
    )

    if return_edge_indices:
        return node_seq, edge_seq
    #print(node_seq,edge_seq)
    return node_seq

In [716]:
def custom_pos_sampling(
    edge_weight: Tensor,
    batch: Tensor,
) -> Union[Tensor, Tuple[Tensor, Tensor]]:
    pos_node_seq = []
    neg_node_seq = []
    for start_node_id in batch:
        current_node_seq = [start_node_id.item()]
        total_distance  = 0
        current_node_id = start_node_id
        # 在edge文件里 对应的id 要 -1 比如neighbour是0， 在文件里index是1
        neighbours_edge_index = (street_edges_index_tensor == current_node_id).nonzero(as_tuple=True)[1]

        # 选出edge对应的weight
        neighbour_weights = torch.index_select(edge_weight,0,neighbours_edge_index)
        norm_neighbour_weights = [i/sum(neighbour_weights.numpy()) for i in neighbour_weights.numpy()]
        #根据概率随机选一个
        neighbour_weights_index = np.random.choice(len(neighbour_weights), p=norm_neighbour_weights)

        # print("current Node id \n", current_node_id)
        #print("neighbour weights \n", neighbour_weights)
        #print("neighbour weights index  \n", neighbour_weights_index)
        #print(neighbour_weights.min(),neighbour_weights.argmin())

        # 取最近的边
        # TODO：加入别的策略，poi信息等
        next_edge_index = neighbours_edge_index[neighbour_weights_index]
        next_edge_df = street_edges_df.iloc[[next_edge_index]]
        #print("next edge \n", next_edge_df)
        next_edge  = next_edge_df.values[0]
        total_distance += next_edge[2]
        # next_edge[0] = source street
        # next_edge[1] = target_street
        # next_edge[2] = distance
        if next_edge[0]!=current_node_id:
            current_node_id = next_edge[0]
        else:
            current_node_id = next_edge[1]
        current_node_seq.append(current_node_id)
        pos_node_seq.append(current_node_seq)

    return torch.from_numpy(np.asarray(pos_node_seq,dtype=np.int32))

In [717]:
def custom_neg_sampling(
    edge_weight: Tensor,
    batch: Tensor,
    adj_size: int
) -> Union[Tensor, Tuple[Tensor, Tensor]]:
    neg_node_seq = []
    for start_node_id in batch:
        current_node_id = start_node_id
        # 在edge文件里 对应的id 要 -1 比如neighbour是0， 在文件里index是1
        neighbours_edge_index = (street_edges_index_tensor == current_node_id).nonzero(as_tuple=True)[1]
        neighbour_weights = torch.index_select(edge_weight,0,neighbours_edge_index)
        neighbour_weights_avg = np.average(neighbour_weights)

        # 选出edge对应的weight
        negative_neighbour_weights_min = 0
        random_neg_index = torch.randint(0, 1,(1,1),dtype=torch.long)
        while negative_neighbour_weights_min < neighbour_weights_avg:
            random_neg_index = torch.randint(0, adj_size,(1,1),dtype=torch.long)
            negative_neighbours_edge_index = (street_edges_index_tensor == random_neg_index).nonzero(as_tuple=True)[1]
            negative_neighbour_weights = torch.index_select(edge_weight,0,negative_neighbours_edge_index)
            negative_neighbour_weights_min = min(negative_neighbour_weights)
        neg_node_seq.append(random_neg_index.item())
    return torch.from_numpy(np.array(neg_node_seq,dtype=np.compat.long))

In [718]:
"""
RawNeighborSampler This module iteratively samples neighbors (at each layer) and constructs bipartite graphs that simulate the actual computation flow of GNNs.

sizes: denotes how much neighbors we want to sample for each node in each layer.

NeighborSampler holds the current :obj:batch_size, the IDs :obj:n_id of all nodes involved in the computation, and a list of bipartite graph objects via the tuple :obj:(edge_index, e_id, size), where :obj:edge_index represents the bipartite edges between source and target nodes, :obj:e_id denotes the IDs of original edges in the full graph, and :obj:size holds the shape of the bipartite graph.

The actual computation graphs are then returned in reverse-mode, meaning that we pass messages from a larger set of nodes to a smaller one, until we reach the nodes for which we originally wanted to compute embeddings.
https://www.arangodb.com/2021/08/a-comprehensive-case-study-of-graphsage-using-pytorchgeometric/
"""
class NeighborSampler(RawNeighborSampler):
    def sample(self, batch):
        batch = torch.tensor(batch)
        row, col, _ = self.adj_t.coo()

        pos_batch = custom_pos_sampling(street_edges_weight_tensor, batch)[:,1]
        #neg_batch = custom_neg_sampling(street_edges_weight_tensor, batch, self.adj_t.size(1))
        neg_batch = torch.randint(0, self.adj_t.size(1),(batch.numel(),),dtype=torch.long)
        #print("Custom nodes seq,", pos_batch)
        #print("negative batch \n ", neg_batch)
        batch = torch.cat([batch, pos_batch, neg_batch], dim=0)
        sampled = super().sample(batch)
        return sampled


train_loader = NeighborSampler(street_edges_index_tensor, sizes=[10, 10], batch_size=128,
                               shuffle=False, num_nodes=number_of_nodes)

In [719]:
class SAGE(nn.Module):
    def __init__(self, in_channels, hidden_channels, num_layers):
        super().__init__()
        self.num_layers = num_layers
        self.convs = nn.ModuleList()
        for i in range(num_layers):
            in_channels = in_channels if i == 0 else hidden_channels
            self.convs.append(SAGEConv(in_channels, hidden_channels))

    def forward(self, x, adjs):
        for i, (edge_index, _, size) in enumerate(adjs):
            x_target = x[:size[1]]  # Target nodes are always placed first.
            x = self.convs[i]((x, x_target), edge_index)
            if i != self.num_layers - 1:
                x = x.relu()
                x = F.dropout(x, p=0.5, training=self.training)
        return x

    def full_forward(self, x, edge_index):
        for i, conv in enumerate(self.convs):
            x = conv(x, edge_index)
            if i != self.num_layers - 1:
                x = x.relu()
                x = F.dropout(x, p=0.5, training=self.training)
        return x


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SAGE(number_of_node_features, hidden_channels=64, num_layers=2)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
x, edge_index = street_nodes_features_tensor.to(device), street_edges_index_tensor.to(device)

In [720]:
def train():
    model.train()

    total_loss = 0
    for batch_size, n_id, adjs in train_loader:
        # `adjs` holds a list of `(edge_index, e_id, size)` tuples.
        adjs = [adj.to(device) for adj in adjs]
        optimizer.zero_grad()

        out = model(x[n_id], adjs)
        out, pos_out, neg_out = out.split(out.size(0) // 3, dim=0)

        pos_loss = F.logsigmoid((out * pos_out).sum(-1)).mean()
        neg_loss = F.logsigmoid(-(out * neg_out).sum(-1)).mean()
        loss = -pos_loss - neg_loss
        loss.backward()
        optimizer.step()

        total_loss += float(loss) * out.size(0)

    return total_loss / number_of_nodes


@torch.no_grad()
def get_model_embedding():
    model.eval()
    embedding = model.full_forward(x, edge_index).cpu()
    return embedding


for epoch in range(1, 40):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, ')

get_model_embedding()

Epoch: 001, Loss: 1.3446, 
Epoch: 002, Loss: 1.2589, 
Epoch: 003, Loss: 1.1670, 
Epoch: 004, Loss: 1.1220, 
Epoch: 005, Loss: 1.1179, 
Epoch: 006, Loss: 1.0938, 
Epoch: 007, Loss: 1.1030, 
Epoch: 008, Loss: 1.0915, 
Epoch: 009, Loss: 1.1073, 
Epoch: 010, Loss: 1.0714, 
Epoch: 011, Loss: 1.0645, 
Epoch: 012, Loss: 1.0459, 
Epoch: 013, Loss: 1.0515, 
Epoch: 014, Loss: 1.0534, 
Epoch: 015, Loss: 1.0483, 
Epoch: 016, Loss: 1.0549, 
Epoch: 017, Loss: 1.0416, 
Epoch: 018, Loss: 1.0319, 
Epoch: 019, Loss: 1.0331, 
Epoch: 020, Loss: 1.0343, 
Epoch: 021, Loss: 1.0291, 
Epoch: 022, Loss: 1.0208, 
Epoch: 023, Loss: 1.0285, 
Epoch: 024, Loss: 1.0217, 
Epoch: 025, Loss: 1.0421, 
Epoch: 026, Loss: 1.0219, 
Epoch: 027, Loss: 1.0055, 
Epoch: 028, Loss: 1.0281, 
Epoch: 029, Loss: 1.0064, 
Epoch: 030, Loss: 1.0113, 
Epoch: 031, Loss: 1.0130, 
Epoch: 032, Loss: 1.0197, 
Epoch: 033, Loss: 1.0198, 
Epoch: 034, Loss: 1.0145, 
Epoch: 035, Loss: 1.0068, 
Epoch: 036, Loss: 0.9991, 
Epoch: 037, Loss: 1.0268, 
E

tensor([[-0.0155,  0.0659, -0.0317,  ..., -0.0446,  0.1622, -0.0338],
        [-0.0155,  0.0659, -0.0317,  ..., -0.0446,  0.1622, -0.0338],
        [-0.0155,  0.0659, -0.0317,  ..., -0.0446,  0.1622, -0.0338],
        ...,
        [-0.0155,  0.0659, -0.0317,  ..., -0.0446,  0.1622, -0.0338],
        [-0.0155,  0.0659, -0.0317,  ..., -0.0446,  0.1622, -0.0338],
        [-0.1300, -0.2442,  0.1846,  ...,  0.1332, -0.5455,  0.2825]])