In [None]:
import pandas as pd
import numpy as np
from collections import defaultdict
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

def load_cora():
    with open("/home/lxhq/workspace/my-graphsage/cora/cora.content") as content:
        content = pd.read_csv(content, sep='\t', header=None)
        labels_raw = content.iloc[:, -1]
        labels_map = {i: j for j, i in enumerate(set(labels_raw))}
        labels = np.array([labels_map[i] for i in labels_raw], dtype=np.int32)
        nodes = np.array(content.iloc[:, 1:-1], dtype=np.int32)
        idx = np.array(content.iloc[:, 0], dtype = np.int32)
        idx_map = {i: j for j, i in enumerate(idx)}

    with open("/home/lxhq/workspace/my-graphsage/cora/cora.cites") as cites:
        cites = pd.read_csv(cites, sep = '\t', header = None)
        edges_index = np.array(list(map(idx_map.get, cites.values.flatten())), dtype=np.int32)
        edges_index = edges_index.reshape(cites.shape)
        adj = defaultdict(set)
        for edge in edges_index:
            adj[edge[0]].add(edge[1])
            adj[edge[1]].add(edge[0])
    return nodes, adj, labels

In [None]:

class node_set(Dataset):
    def __init__(self, nodes, labels, device):
        self.nodes = torch.tensor(nodes, device=device, dtype=torch.float)
        self.labels = torch.tensor(labels, device=device, dtype=torch.long)

    def __getitem__(self, index):
        return self.nodes[index], self.labels[index]

    def __len__(self):
        return len(self.nodes)

In [146]:
embedding_feat = 128
epochs = 100
batch = 256
layers = 2
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
features, adj, labels = load_cora()
features = torch.tensor(features, device=device, dtype=torch.float)
nodes = np.array(range(len(features)))
train_dataset = node_set(nodes[1500:], labels[1500:], device)
test_dataset = node_set(nodes[:1000], labels[:1000], device)
train_data_loader = DataLoader(train_dataset, batch_size=batch, shuffle=True, drop_last=False)
test_data_loader = DataLoader(test_dataset, batch_size=batch, shuffle=False, drop_last=False)

In [217]:
def get_layers_input(input, adj, layers):
    node_layers = []
    all_nodes = set()
    for layer in range(layers):
        if layer == 0:
            node_layers.append(input)
            all_nodes.union([i.item() for i in input])
        else:
            temp = []
            for node in node_layers[layer - 1]:
                neighbors = adj[node.item()]
                temp.append(node.item())
                for neighbor in neighbors:
                    if neighbor not in all_nodes:
                        temp.append(neighbor)
                        all_nodes.add(neighbor)
            node_layers.append(torch.tensor(temp))
    return list(reversed(node_layers))
    

In [220]:
for idx, (input, target) in enumerate(train_data_loader):
    input_layers = get_layers_input(input, adj, layers)
    print(len(input_layers[0]))
    print(len(input_layers[1]))

881
256
835
256
931
256
868
256
715
184


In [214]:
a = [torch.tensor(1),torch.tensor(2),3,4]

In [215]:
list(reversed(a))

[4, 3, tensor(2), tensor(1)]

In [221]:
a = {0:[1,2,3], 1:[2,3,4], 2:[4,5,6],3:[5,6,7]}

In [222]:
a

{0: [1, 2, 3], 1: [2, 3, 4], 2: [4, 5, 6], 3: [5, 6, 7]}

In [236]:
a = {1:2,2:3}
a.items()[0]

TypeError: 'dict_items' object is not subscriptable

In [232]:
a[0] = torch.tensor([1,2,3,4,5])

RuntimeError: The expanded size of the tensor (4) must match the existing size (5) at non-singleton dimension 0.  Target sizes: [4].  Tensor sizes: [5]

In [231]:
a

tensor([[1., 2., 3., 4.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])