Data creation

In [33]:
from math import sqrt
import torch
from torch_geometric.data import Data
from random import randint
from sys import float_info

In [34]:
instances = {}
for k in range(0, 5000):
    nodes = {}
    for i in range(0, 50):
        lat_i = randint(0, 100)
        lon_i = randint(0, 100)
        node_i = (lat_i, lon_i)
        lat_j = randint(0, 100)
        lon_j = randint(0, 100)
        node_j = (lat_j, lon_j)
        nodes[i + 1] = node_i
        nodes[i + 51] = node_j

    dist = {}
    pairs = {}
    for i in range(1, 101):
        for j in range(1, 101):
            if i != j:
                dist[i,j] = sqrt( (nodes[i][0] - nodes[j][0])**2 + (nodes[i][1] - nodes[j][1])**2 )
            else:
                dist[i,j] = float_info.max
    for i in range(1, 101):
        for j in range(1, 101):
            if i not in pairs:
                pairs[i] = j
            if i != j:
                if dist[i,j] < dist[i,pairs[i]]:
                    pairs[i] = j

    nodes[0] = (0,0)
    for i in range(1,101):
        dist[0,i] = sqrt( (nodes[0][0] - nodes[i][0])**2 + (nodes[0][1] - nodes[i][1])**2 )
        dist[i,0] = dist[0,i]
    y = [[0 for _ in range(101)] for _ in range(101)]
    for i in range(101):
        if i > 0:
            y[i][pairs[i]] = 1
                
    instances[k] = {"nodes": nodes, "dist": dist, "y": y}

Short visuals

In [35]:
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import networkx as nx
G = nx.Graph()
G.add_nodes_from([i for i in range(101)])
pos = instances[430]["nodes"]
node_x = [pos[i][0] for i in range(101)]
node_y = [pos[i][1] for i in range(101)]
node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers',
    hoverinfo='text',
    text=[str(i) for i in range(101)],
    marker=dict(
        showscale=True,
        # colorscale options
        #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
        #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
        #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
        colorscale='YlGnBu',
        reversescale=True,
        color=[],
        size=10,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line_width=2))

fig = go.Figure(data=[node_trace],
             layout=go.Layout(
                title='<br>Ng-Neighborhoods as a graph',
                titlefont_size=16,
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    text="Python code: <a href='https://plotly.com/ipython-notebooks/network-graphs/'> https://plotly.com/ipython-notebooks/network-graphs/</a>",
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.005, y=-0.002 ) ],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                height=800, width=800)
                
                )
fig.show()

Create just one dataset for all data

In [59]:
data = torch.device("cpu")  # Change device if needed
num_nodes = 101  # Nodes per set
num_sets = 100  # Total sets
in_features = 2  # Replace with your feature dimension

In [60]:
# Combine all node features into single tensor
all_x = torch.zeros((num_nodes * num_sets, in_features)).to(data)

# Loop through each set and process data
total_edge_index = None  # Initialize to accumulate edges from all sets
total_y = None  # Initialize to accumulate labels

In [61]:
from torch_geometric.nn import knn_graph
from torch_geometric.utils import add_self_loops, negative_sampling

In [69]:
for i in range(num_sets):
    # Sample features for current set (replace with actual data)
    nodes_dict = instances[i]["nodes"]
    new_nodes = []
    for j in range(101):
        new_nodes.append([nodes_dict[j][0], nodes_dict[j][1]])
    set_x = torch.tensor(new_nodes, dtype=torch.float).to(data)
    all_x[i * num_nodes: (i + 1) * num_nodes] = set_x

    # Sample edges (replace with actual edges) for current set
    # Ensure edges are within the current set's node range (0 to num_nodes-1)
    set_edge_index = knn_graph(set_x, 15).to(data)

    # Combine edge indices from all sets (adjust for offset based on set index)
    if total_edge_index is None:
        total_edge_index = set_edge_index + i * num_nodes
    else:
        total_edge_index = torch.cat([total_edge_index, set_edge_index + i * num_nodes], dim=1)

    # Generate labels (1 for positive edges, 0 for negative) for current set
    set_y = torch.tensor([instances[i]["y"][set_edge_index[0][j]][set_edge_index[1][j]] for j in range(len(set_edge_index[0]))] , dtype=torch.int).to(data)

    # Perform negative sampling on the current set's edges
    # set_edge_index, set_y = negative_sampling(set_edge_index, num_nodes, num_neg_samples=set_edge_index.size(1))

    # Accumulate labels from all sets
    if total_y is None:
        total_y = set_y
    else:
        total_y = torch.cat([total_y, set_y], dim=0)

all_edge_weights = torch.ones(total_edge_index.size(1)).to(data)

Model

In [66]:
from torch_geometric.nn import GCNConv, MessagePassing

In [78]:
class LinkPredictionModel(torch.nn.Module):
    def __init__(self, in_features, hidden_features):
        super(LinkPredictionModel, self).__init__()
        self.conv1 = GCNConv(in_features, hidden_features)
        self.conv2 = GCNConv(hidden_features, 1)  # Output dimension is 1 for edge existence

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        return torch.sigmoid(x.squeeze())  # Output edge existence probability

In [80]:
# Model definition and training loop (replace with your training logic)
model = LinkPredictionModel(in_features, 202).to(data)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.BCELoss()  # Binary Cross-Entropy Loss

In [82]:
for epoch in range(10):
    optimizer.zero_grad()
    pred = model(all_x, total_edge_index)
    loss = criterion(pred, total_y)
    loss.backward()
    optimizer.step()

ValueError: Using a target size (torch.Size([303000])) that is different to the input size (torch.Size([10100])) is deprecated. Please ensure they have the same size.