In [1]:
import timeit
import numpy as np
from torch_geometric.loader import TemporalDataLoader

from tgb.nodeproppred.dataset_pyg import PyGNodePropPredDataset
from tgb.nodeproppred.evaluate import Evaluator



In [2]:
class MovingAverage:
    def __init__(self, num_class, K=7):
        self.dict = {}
        self.num_class = num_class
        self.K = K

    def update_dict(self, node_id, label):
        if node_id in self.dict:
            total = self.dict[node_id] * (self.K - 1) + label
            self.dict[node_id] = total / self.K
        else:
            self.dict[node_id] = label

    def query_dict(self, node_id):
        
        #node_id: the node to query
        if node_id in self.dict:
            return self.dict[node_id]
        else:
            return np.zeros(self.num_class)



In [3]:
def test_n_upate(loader):
    label_t = dataset.get_label_time()  # check when does the first label start
    num_label_ts = 0
    total_score = 0

    for batch in loader:
        batch = batch.to(device)
        src, pos_dst, t, msg = batch.src, batch.dst, batch.t, batch.msg

        query_t = batch.t[-1]
        if query_t > label_t:
            label_tuple = dataset.get_node_label(query_t)
            if label_tuple is None:
                break
            label_ts, label_srcs, labels = (
                label_tuple[0],
                label_tuple[1],
                label_tuple[2],
            )
            label_ts = label_ts.numpy()
            label_srcs = label_srcs.numpy()
            labels = labels.numpy()
            label_t = dataset.get_label_time()

            preds = []

            for i in range(0, label_srcs.shape[0]):
                node_id = label_srcs[i]
                pred_vec = forecaster.query_dict(node_id)
                preds.append(pred_vec)
                forecaster.update_dict(node_id, labels[i])

            np_pred = np.stack(preds, axis=0)
            np_true = labels

            input_dict = {
                "y_true": np_true,
                "y_pred": np_pred,
                "eval_metric": [eval_metric],
            }
            result_dict = evaluator.eval(input_dict)
            score = result_dict[eval_metric]

            total_score += score
            num_label_ts += 1

    metric_dict = {}
    metric_dict[eval_metric] = total_score / num_label_ts
    return metric_dict

In [4]:
K = 7
device = "cpu"
name = "tgbn-reddit"
dataset = PyGNodePropPredDataset(name=name, root="datasets")

raw file found, skipping download
Dataset directory is  /Users/anastasiiaizotova/opt/anaconda3/envs/env_3_12/lib/python3.10/site-packages/tgb/datasets/tgbn_reddit


12077151it [00:33, 361268.72it/s]


In [5]:
num_classes = dataset.num_classes
data = dataset.get_TemporalData()
data = data.to(device)

In [6]:
num_classes

698

In [7]:
eval_metric = dataset.eval_metric
forecaster = MovingAverage(num_classes, K=K)
evaluator = Evaluator(name=name)

In [8]:
# Ensure to only sample actual destination nodes as negatives.
min_dst_idx, max_dst_idx = int(data.dst.min()), int(data.dst.max())
train_data, val_data, test_data = data.train_val_test_split(
    val_ratio=0.15, test_ratio=0.15
)

In [9]:
batch_size = 200

In [10]:
train_loader = TemporalDataLoader(train_data, batch_size=batch_size)
val_loader = TemporalDataLoader(val_data, batch_size=batch_size)
test_loader = TemporalDataLoader(test_data, batch_size=batch_size)

In [11]:
metric_dict = test_n_upate(train_loader)
print(metric_dict)

{'ndcg': 0.5770426324129679}


In [12]:
val_dict = test_n_upate(val_loader)
print(val_dict)

{'ndcg': 0.5740962485683613}


In [13]:
test_dict = test_n_upate(test_loader)
print(test_dict)

{'ndcg': 0.5592638307834997}
