In [1]:
import sys, os
sys.path.insert(0, '..')

In [2]:
import torch
from models.weighted_node2vec import WeightedNode2Vec
from torch_geometric.nn.models import Node2Vec
from dataset import triplet_dataset
from utils.config import *
from tqdm import tqdm, trange
import gc
import warnings

gc.enable()

In [3]:
d = triplet_dataset.TripletPokecDataset()
edge_index, num_nodes = d.edge_index, d.n_nodes
gc.collect()
edge_index

Using existing file soc-pokec-profiles.txt.gz
Using existing file soc-pokec-relationships.txt.gz


tensor([[      0,       0,       0,  ..., 1632801, 1632801, 1632802],
        [     12,      10,       5,  ..., 1632636, 1632735, 1632404]])

In [4]:
edge_index[0, :].shape, edge_index.shape

(torch.Size([30622564]), torch.Size([2, 30622564]))

In [5]:
model = WeightedNode2Vec(edge_weights=edge_index[0, :], 
                         num_nodes=num_nodes,
                         edge_index=edge_index, 
                         embedding_dim=16, walk_length=5, 
                         context_size=2, ).to(DEVICE)
# model = Node2Vec(# edge_weights=edge_index[0, :], 
#                  #        num_nodes=num_nodes,
#                          edge_index=edge_index, 
#                          embedding_dim=16, walk_length=5, 
#                          context_size=2, ).to(DEVICE)

In [6]:
loader = model.loader(batch_size=128, shuffle=True, num_workers=4)
optimizer = torch.optim.Adam(list(model.parameters()), lr=0.01)

In [7]:
def train():
    model.train()
    total_loss = 0
    for pos_rw, neg_rw in loader:
        optimizer.zero_grad()
        loss = model.loss(pos_rw.to(DEVICE), neg_rw.to(DEVICE))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)


@torch.no_grad()
def test():
    model.eval()
    z = model()
    acc = model.test(z[data.train_mask], data.y[data.train_mask],
                     z[data.test_mask], data.y[data.test_mask],
                     max_iter=10)
    return acc

t = trange(1)
for epoch in t:
    loss = train()
    t.set_description(f'Epoch: {epoch:02d}, Loss: {loss:.4f}')
    t.refresh()
    #acc = test()
#     if epoch % 10 == 0:
#         print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}')

Epoch: 00, Loss: 2.8785: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [02:52<00:00, 172.57s/it]


In [8]:
batch = next(iter(loader))

In [9]:
len(batch)

2

In [10]:
batch[0].shape

torch.Size([512, 2])

In [11]:
# we can use these embeddings to feed to model,
# in features will be the number of embedding size
model.embedding.weight.shape

torch.Size([1632803, 16])

In [12]:
# adj = model.adj.to_symmetric()
# row, col, _ = adj.coo()

In [13]:
row, col

(tensor([      0,       0,       0,  ..., 1632801, 1632801, 1632802]),
 tensor([      1,       2,       3,  ..., 1632636, 1632735, 1632404]))

In [14]:
from scipy import sparse

In [15]:
# ones = np.ones(row.shape[0], dtype=np.int8)
# A = sparse.csr_matrix((ones, (row.numpy(), col.numpy())), 
#                       shape=(num_nodes, num_nodes))

In [16]:
A

<1632803x1632803 sparse matrix of type '<class 'numpy.int8'>'
	with 44603928 stored elements in Compressed Sparse Row format>

In [17]:
from utils import graph

In [18]:
G = graph.from_numpy(A, undirected=True)

In [19]:
G.attr = d.X[:, 0]

In [20]:
graph.set_weights(G, exp_=2., p_bndry=.7, l=2)

In [None]:
import pickle as pkl
pkl.save(graph, "graph.pkl")