In [1]:
import sys, os
sys.path.insert(0, '..')
os.environ["CUDA_VISIBLE_DEVICES"]=""

In [2]:
import torch
from models.weighted_node2vec import WeightedNode2Vec
from dataset import triplet_dataset, pokec_data
from utils.config import DEVICE
from tqdm import tqdm, trange
import gc
from utils.link_prediction import GCNLinkPrediction
import residual2vec as rv
import warnings

warnings.filterwarnings("ignore")
gc.enable()

In [3]:
window_length = 5
num_walks = 10
dim = 128
walk_length = 80
NUM_WORKERS = 4

In [4]:
d = pokec_data.PokecDataFrame()
edge_index, num_nodes = d.edge_index, d.X.shape[0]

Using existing file soc-pokec-profiles.txt.gz
Using existing file soc-pokec-relationships.txt.gz


In [5]:
node_to_vec = WeightedNode2Vec(num_nodes=num_nodes,
                         group_membership=d.get_grouped_col(),
                         weighted_adj="../data/pokec_crosswalk_adj.npz",
                         edge_index=edge_index, 
                         embedding_dim=16, walk_length=walk_length, 
                         context_size=2, ).to(DEVICE)

In [6]:
k = "degree-unbiased-gcn"
model = rv.residual2vec_sgd(
    noise_sampler=rv.ConfigModelNodeSampler(),
    window_length=window_length,
    num_walks=num_walks,
    walk_length=walk_length
).fit()

In [7]:
loader = node_to_vec.loader(batch_size=128, shuffle=True, num_workers=4)
optimizer = torch.optim.Adam(list(node_to_vec.parameters()), lr=0.01)

In [8]:
X = node_to_vec.train_and_get_embs(loader, optimizer, epochs=0, save="path")

0it [00:00, ?it/s]

training node2vec





In [9]:
d = triplet_dataset.TripletGraphDataset(X=X, edge_index=edge_index)
dataloader = triplet_dataset.NeighborEdgeSampler(d, batch_size=1, shuffle=True, num_workers=1, pin_memory=True)

num_embeddings 6


In [10]:
m = GCNLinkPrediction(in_channels=d.num_features, embedding_size=128, hidden_channels=64, num_layers=5, num_embeddings=16)
# model.transform(model=m, dataloader=dataloader)

In [11]:
# torch.save(m.state_dict(), k)

In [12]:
batch = next(iter(dataloader))

In [13]:
m.to(DEVICE)

GCNLinkPrediction(
  (ivectors): Linear(in_features=16, out_features=128, bias=True)
  (ovectors): Linear(in_features=16, out_features=128, bias=True)
  (in_layer): GCNConv(16, 64)
  (out_layer): GCNConv(64, 2048)
  (cnn_0): GCNConv(64, 64)
  (cnn_1): GCNConv(64, 64)
  (cnn_2): GCNConv(64, 64)
  (lin): Linear(in_features=4096, out_features=128, bias=True)
)

In [14]:
m.forward_i(batch[0])

tensor([[-0.0664, -0.2907,  0.1268,  0.0493, -0.1212, -0.2206,  0.1029,  0.0301,
         -0.2786, -0.1259,  0.1222, -0.0183,  0.2683, -0.1957, -0.2714,  0.0738,
          0.0171, -0.0170, -0.0682, -0.1558,  0.1045,  0.1526,  0.2013,  0.0237,
          0.0790,  0.2630, -0.0909,  0.2392,  0.0081,  0.0245,  0.0623, -0.3226,
         -0.1003,  0.0490, -0.0263, -0.0681, -0.2912,  0.0926,  0.2797,  0.3329,
          0.0629, -0.1878, -0.1724, -0.1880,  0.2743,  0.2627,  0.1285,  0.1206,
          0.1137,  0.4871,  0.1326,  0.1449,  0.0061,  0.2396,  0.2016,  0.1812,
          0.1311,  0.0474,  0.0892,  0.0007,  0.0693,  0.0049,  0.0377,  0.1527,
          0.1613, -0.1865,  0.0276,  0.0159, -0.2282,  0.3129, -0.0394, -0.0661,
          0.2094,  0.0581,  0.0319,  0.2666,  0.0048, -0.0572,  0.1165,  0.0339,
         -0.4578, -0.0188, -0.0117, -0.2521,  0.3476,  0.2969,  0.0285, -0.0521,
         -0.2121, -0.4158, -0.1480,  0.0520,  0.0593, -0.1396, -0.5546, -0.4005,
         -0.3793,  0.0686,  

In [16]:
m.forward_i(batch[0]).shape

torch.Size([1, 128])