In [1]:
import os
import sys
import re
import time
import json
import pickle
import logging
import math
import random
import argparse
import subprocess

from collections import defaultdict

import numpy as np
import scipy as sp

import torch
import torch.nn as nn
import torch.nn.functional as F

from tqdm import tqdm

from sklearn import linear_model
from sklearn import metrics

In [2]:
DATASET_NUM_DIC = {
    'epinions': 131828,
    'slashdot': 82140,
    'bitcoin_alpha': 3783,
    'bitcoin_otc': 5881,
}

### Feature Extraction

In [3]:
class FeaExtra(object):
    def __init__(self, dataset='epinions', k=1, debug=False):
        filename = r'C:\Users\sss\Desktop\SiGAT/experiment-data/{}-train-{}.edgelist'.format(dataset, k)  # 导入训练数据
        if debug:
            filename = './test.edgelists'
        res = self.init_edgelists(filename=filename)
        self.pos_in_edgelists, self.pos_out_edgelists, self.neg_in_edgelists, self.neg_out_edgelists = res

    def init_edgelists(self, filename=r'C:\Users\sss\Desktop\SiGAT/experiment-data/epinions-train-1.edgelist'):
        
        pos_out_edgelists = defaultdict(list)
        neg_out_edgelists = defaultdict(list)
        pos_in_edgelists = defaultdict(list)
        neg_in_edgelists = defaultdict(list)
        
        with open(filename) as f:
            for line in f.readlines():
                x, y, z = line.split()
                x = int(x)
                y = int(y)
                z = int(z)
                
                if z == 1:
                    pos_out_edgelists[x].append(y)  # u->v; u向外指向v的有向边
                    pos_in_edgelists[y].append(x)  # v->u: v被u指向
                else:
                    neg_out_edgelists[x].append(y)
                    neg_in_edgelists[y].append(x)
        return pos_in_edgelists, pos_out_edgelists, neg_in_edgelists, neg_out_edgelists

    def get_pos_indegree(self, v):
        return len(self.pos_in_edgelists[v])

    def get_pos_outdegree(self, v):
        return len(self.pos_out_edgelists[v])

    def get_neg_indegree(self, v):
        return len(self.neg_in_edgelists[v])

    def get_neg_outdegree(self, v):
        return len(self.neg_out_edgelists[v])

    def common_neighbors(self, u, v):
        u_neighbors = self.pos_in_edgelists[u] + self.neg_in_edgelists[u] + \
                      self.pos_out_edgelists[u] + self.neg_out_edgelists[u]
        v_neighbors = self.pos_in_edgelists[v] + self.neg_in_edgelists[v] + \
                      self.pos_out_edgelists[v] + self.neg_out_edgelists[v]
        return len(set(u_neighbors).intersection(set(v_neighbors)))

    def feature_part1(self, u, v):
        d_pos_in_u = self.get_pos_indegree(u)
        d_neg_in_v = self.get_neg_indegree(v)
        d_pos_out_u = self.get_pos_outdegree(u)
        d_neg_out_v = self.get_neg_outdegree(v)

        # d_pos_in_v = self.get_pos_indegree(v)
        # d_neg_in_u = self.get_neg_indegree(u)
        # d_pos_out_v = self.get_pos_outdegree(v)
        # d_neg_out_u = self.get_neg_outdegree(u)

        c_u_v = self.common_neighbors(u, v)
        d_out_u = self.get_neg_outdegree(u) + self.get_pos_outdegree(u)
        d_in_v = self.get_neg_indegree(v) + self.get_pos_indegree(v)
        return d_pos_in_u, d_neg_in_v, d_pos_out_u, d_neg_out_v, c_u_v, d_out_u, d_in_v

    def feature_part2(self, u, v):
        """
        /^ \v /^ \^ /v \v /v ^\
        ++
        /^ \v /^ \^ /v \v /v ^\
        +-
        /^ \v /^ \^ /v \v /v ^\
        -+
        /^ \v /^ \^ /v \v /v ^\
        --
        """
        d1_1 = len(set(self.pos_out_edgelists[u]).intersection(set(self.pos_in_edgelists[v])))  # 集合交集
        d1_2 = len(set(self.pos_out_edgelists[u]).intersection(set(self.neg_in_edgelists[v])))
        d1_3 = len(set(self.neg_out_edgelists[u]).intersection(set(self.pos_in_edgelists[v])))
        d1_4 = len(set(self.neg_out_edgelists[u]).intersection(set(self.neg_in_edgelists[v])))

        d2_1 = len(set(self.pos_out_edgelists[u]).intersection(set(self.pos_out_edgelists[v])))
        d2_2 = len(set(self.pos_out_edgelists[u]).intersection(set(self.neg_out_edgelists[v])))
        d2_3 = len(set(self.neg_out_edgelists[u]).intersection(set(self.pos_out_edgelists[v])))
        d2_4 = len(set(self.neg_out_edgelists[u]).intersection(set(self.neg_out_edgelists[v])))

        d3_1 = len(set(self.pos_in_edgelists[u]).intersection(set(self.pos_out_edgelists[v])))
        d3_2 = len(set(self.pos_in_edgelists[u]).intersection(set(self.neg_out_edgelists[v])))
        d3_3 = len(set(self.neg_in_edgelists[u]).intersection(set(self.pos_out_edgelists[v])))
        d3_4 = len(set(self.neg_in_edgelists[u]).intersection(set(self.neg_out_edgelists[v])))

        d4_1 = len(set(self.pos_in_edgelists[u]).intersection(set(self.pos_in_edgelists[v])))
        d4_2 = len(set(self.pos_in_edgelists[u]).intersection(set(self.neg_in_edgelists[v])))
        d4_3 = len(set(self.neg_in_edgelists[u]).intersection(set(self.pos_in_edgelists[v])))
        d4_4 = len(set(self.neg_in_edgelists[u]).intersection(set(self.neg_in_edgelists[v])))

        return d1_1, d1_2, d1_3, d1_4, d2_1, d2_2, d2_3, d2_4, d3_1, d3_2, d3_3, d3_4, d4_1, d4_2, d4_3, d4_4

    def get_features(self, u, v):
        x11 = self.feature_part1(u, v)
        x12 = self.feature_part2(u, v)
        return x11 + x12

### 参数设置

In [5]:
# Training settings
parser = argparse.ArgumentParser()
parser.add_argument('--devices', type=str, default='cpu', help='Devices')
parser.add_argument('--seed', type=int, default=13, help='Random seed.')
parser.add_argument('--epochs', type=int, default=100, help='Number of epochs to train.')
parser.add_argument('--lr', type=float, default=0.0005, help='Initial learning rate.')
parser.add_argument('--weight_decay', type=float, default=0.0001, help='Weight decay (L2 loss on parameters).')
parser.add_argument('--dataset', default='bitcoin_alpha', help='Dataset')
parser.add_argument('--dim', type=int, default=20, help='Embedding Dimension')
parser.add_argument('--fea_dim', type=int, default=20, help='Feature Embedding Dimension')
parser.add_argument('--batch_size', type=int, default=500, help='Batch Size')
parser.add_argument('--dropout', type=float, default=0.0, help='Dropout k')
parser.add_argument('--k', default=1, help='Folder k')

args = parser.parse_args(args=[])

In [None]:
# 文件输出

OUTPUT_DIR = r'C:\Users\sss\Desktop\SiGAT\embeddings\sigat'
if not os.path.exists(r'C:\Users\sss\Desktop\SiGAT\embeddings'):
    os.mkdir(r'C:\Users\sss\Desktop\SiGAT\embeddings')
    if not os.path.exists(OUTPUT_DIR):
        os.mkdir(OUTPUT_DIR)

In [6]:
# 随机种子

random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)

<torch._C.Generator at 0x248ffefd270>

In [7]:
NEG_LOSS_RATIO = 1
INTERVAL_PRINT = 20

NUM_NODE = DATASET_NUM_DIC[args.dataset]  # "bitcoin_alpha"
WEIGHT_DECAY = args.weight_decay
NODE_FEAT_SIZE = args.fea_dim
EMBEDDING_SIZE1 = args.dim
DEVICES = torch.device(args.devices)
LEARNING_RATE = args.lr
BATCH_SIZE = args.batch_size
EPOCHS = args.epochs
DROUPOUT = args.dropout
K = args.k

In [8]:
print(DEVICES)

cpu


### 构建模型

In [9]:
class Encoder(nn.Module):
    # Encode features to embeddings
    def __init__(self, features_lists, feature_dim, embed_dim, adj_lists, aggs):
        super(Encoder, self).__init__()
        self.features_lists = features_lists  # features
        self.feat_dim = feature_dim  # dim
        self.adj_lists = adj_lists  # 38个motifs
        self.aggs = aggs  # 38个motifs对应到的attention layer
        
        self.embed_dim = embed_dim
        for i, agg in enumerate(self.aggs):
            self.add_module('agg_{}'.format(i), agg)  # 模块化的值
            self.aggs[i] = agg.to(DEVICES)
            
        def init_weights(m):
            if type(m) == nn.Linear:
                torch.nn.init.kaiming_normal_(m.weight)
                m.bias.data.fill_(0.01)
                
        self.nonlinear_layer = nn.Sequential(
            nn.Linear(self.feat_dim * (len(adj_lists) + 1), self.feat_dim),
            nn.Tanh(),
            nn.Linear(self.feat_dim, self.embed_dim)
        )
        self.nonlinear_layer.apply(init_weights)
        
    def forward(self, nodes):
        # Generates embeddings for nodes.
        neigh_feats = [agg.forward(nodes, adj) for adj, agg in zip(self.adj_lists, self.aggs)]  # return 38 motifs GAT
        self_feats = self.features_lists[0](torch.LongTensor(nodes).to(DEVICES))
        combined = torch.cat([self_feats] + neigh_feats, 1)
        combined = self.nonlinear_layer(combined)
        return combined        

In [10]:
class SpecialSpmmFunction(torch.autograd.Function):
    # Special function for only sparse region backpropataion layer
    @staticmethod
    def forward(ctx, indices, values, shape, b):
        assert indices.requires_grad == False
        a = torch.sparse_coo_tensor(indices, values, shape, device=DEVICES)  # 稀疏矩阵
        ctx.save_for_backward(a, b)
        ctx.N = shape[0]
        return torch.matmul(a, b)
    
    @staticmethod
    def backward(ctx, grad_output):
        a, b = ctx.saved_tensors
        grad_values = grad_b = None
        if ctx.needs_input_grad[1]:
            grad_a_dense = grad_output.matmul(b.t())
            edge_idx = a._indices()[0, :] * ctx.N + a._indices()[1, :]
            grad_values = grad_a_dense.view(-1)[edge_idx]
        if ctx.needs_input_grad[3]:
            grad_b = a.t().matmul(grad_output)
        return None, grad_values, None, grad_b

In [11]:
class SpecialSpmm(nn.Module):
    def forward(self, indices, values, shape, b):
        return SpecialSpmmFunction.apply(indices, values, shape, b)

In [12]:
class AttentionAggregator(nn.Module):
    def __init__(self, features, in_dim, out_dim, node_num, dropout_rate=DROUPOUT, slope_ratio=0.1):
        super(AttentionAggregator, self).__init__()
        
        self.features = features
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.dropout = nn.Dropout(dropout_rate)
        self.slope_ratio = slope_ratio
        self.a = nn.Parameter(torch.FloatTensor(out_dim * 2, 1))  # attention参数; W1||W2
        nn.init.kaiming_normal_(self.a.data)
        self.speical_spmm = SpecialSpmm()  # 稀疏矩阵的梯度更新
        
        self.out_linear_layer = nn.Linear(self.in_dim, self.out_dim)  # W * h
        self.unique_nodes_dict = np.zeros(node_num, dtype=np.int32)
        
    def forward(self, nodes, adj):
        """
        nodes --- list of nodes in a batch
        adj --- sp.sparse.csr_matrix
        """
        node_pku = np.array(nodes)  # minbatch的节点
        edges = np.array(adj[nodes, :].nonzero()).T  # 选择的邻接矩阵的边; 按照edges的行进行编号的
        edges[:, 0] = node_pku[edges[:, 0]]  # 将node本身的index赋予到edges上
        
        unique_nodes_list = np.unique(np.hstack((np.unique(edges), np.array(nodes))))
        
        batch_node_num = len(unique_nodes_list)
        # this dict can map new i to originial node id; 将新id映射到原始id上
        self.unique_nodes_dict[unique_nodes_list] = np.arange(batch_node_num)  # 对每一个单词给予一个index
        # 将之前的索引转换成新标记的索引（新标记的是从0开始到batch_node_num）
        edges[:, 0] = self.unique_nodes_dict[edges[:, 0]]  # 赋予新的id
        edges[:, 1] = self.unique_nodes_dict[edges[:, 1]]
        
        n2 = torch.LongTensor(unique_nodes_list).to(DEVICES)  # 旧的节点集合
        new_embeddings = self.out_linear_layer(self.features(n2))  # W * h
        
        original_node_edge = np.array([self.unique_nodes_dict[nodes], self.unique_nodes_dict[nodes]]).T  # 自连接边
        edges = np.vstack((edges, original_node_edge))  # 加入自连接边
        edges = torch.LongTensor(edges).to(DEVICES)
        
        edge_h_2 = torch.cat((new_embeddings[edges[:, 0], :], new_embeddings[edges[:, 1], :]), dim=1)  # Whi||Whj
        edges_h = torch.exp(F.leaky_relu(torch.einsum("ij, jl -> il", [edge_h_2, self.a]), self.slope_ratio))  # attention系数
        indices = edges
                    # 稀疏矩阵计算     # 点边关系   # attention系数  #
        row_sum = self.speical_spmm(edges.t(), edges_h[:, 0], torch.Size((batch_node_num, batch_node_num)), torch.ones(size=(batch_node_num, 1)).to(DEVICES))

        results = self.speical_spmm(edges.t(), edges_h[:, 0], torch.Size((batch_node_num, batch_node_num)), new_embeddings)

        output_emb = results.div(row_sum)  # 除法：GAT的输出结果
        
        return output_emb[self.unique_nodes_dict[nodes]]  # 返回minbatch中涉及到的node

In [13]:
class SiGAT(nn.Module):
    def __init__(self, enc):
        super(SiGAT, self).__init__()
        self.enc = enc
        
    def forward(self, nodes):
        embeds = self.enc(nodes)
        return embeds
    
    def criterion(self, nodes, pos_neighbors, neg_neighbors):
        pos_neighbors_list = [set.union(pos_neighbors[i]) for i in nodes]  # node的positive节点邻接
        neg_neighbors_list = [set.union(neg_neighbors[i]) for i in nodes]  # node的negative节点邻接
        unique_nodes_list = list(set.union(*pos_neighbors_list).union(*neg_neighbors_list).union(nodes))  # 节点集合
        unique_nodes_dict = {n: i for i, n in enumerate(unique_nodes_list)}  # node: index
        nodes_embs = self.enc(unique_nodes_list)  # 输出节点embedding
        
        loss_total = 0
        for index, node in enumerate(nodes):
            z1 = nodes_embs[unique_nodes_dict[node], :]  # 该节点的embedding
            pos_neigs = list([unique_nodes_dict[i] for i in pos_neighbors[node]])  # 该节点的positive邻居节点
            neg_neigs = list([unique_nodes_dict[i] for i in neg_neighbors[node]])  # 该节点的negative邻居节点
            pos_num = len(pos_neigs)
            neg_num = len(neg_neigs)

            if pos_num > 0:
                pos_neig_embs = nodes_embs[pos_neigs, :]  # positive邻居节点
                loss_pku = -1 * torch.sum(F.logsigmoid(torch.einsum("nj, j -> n", [pos_neig_embs, z1])))
                loss_total += loss_pku
            tmp_pku = 1 if neg_num == 0 else neg_num
            C = pos_num // tmp_pku  # 正样本是负样本的多少倍
            if C == 0:
                C = 1
            if neg_num > 0:
                neg_neig_embs = nodes_embs[neg_neigs, :]
                loss_pku = -1 * torch.sum(F.logsigmoid(-1 * torch.einsum("nj , j -> n", [neg_neig_embs, z1])))
                loss_total += C * NEG_LOSS_RATIO  * loss_pku
                
        return loss_total

In [14]:
def load_data2(filename='', add_public_foe=True):
    adj_lists1   = defaultdict(set)
    adj_lists1_1 = defaultdict(set)
    adj_lists1_2 = defaultdict(set)
    
    adj_lists2   = defaultdict(set)
    adj_lists2_1 = defaultdict(set)
    
    adj_lists2_2 = defaultdict(set)
    adj_lists3   = defaultdict(set)


    with open(filename) as fp:
        for i, line in enumerate(fp):
            info = line.strip().split()
            person1 = int(info[0])  # src
            person2 = int(info[1])  # dst
            v = int(info[2])  # signed edge
            adj_lists3[person2].add(person1)  # 构建无向图连接的边
            adj_lists3[person1].add(person2)

            if v == 1:  # positive
                adj_lists1[person1].add(person2)  # positive的无向图边
                adj_lists1[person2].add(person1)

                adj_lists1_1[person1].add(person2)  # positive的有向图边; u->v
                adj_lists1_2[person2].add(person1)  # v->u
            else:  # negative
                adj_lists2[person1].add(person2)
                adj_lists2[person2].add(person1)

                adj_lists2_1[person1].add(person2)  # u->v
                adj_lists2_2[person2].add(person1)


    return adj_lists1, adj_lists1_1, adj_lists1_2, adj_lists2, adj_lists2_1, adj_lists2_2, adj_lists3

In [15]:
def read_emb(num_nodes, fpath):
    dim = 0
    embeddings = 0
    with open(fpath) as f:
        for i, line in enumerate(f.readlines()):
            if i == 0:
                dim = int(line.split()[1])
                embeddings = np.random.rand(num_nodes, dim)
            else:
                line_l = line.split()
                node = line_l[0]
                emb = [float(j) for j in line_l[1: ]]
                assert len(emb) == dim
                embeddings[int(node)] = np.array(emb)
                
    return embeddings

### 模型训练

In [61]:
def run( dataset='bitcoin_alpha', k=2):
    num_nodes = DATASET_NUM_DIC[dataset] + 3  # 节点数量

    # adj_lists1, adj_lists2, adj_lists3 = load_data(k, dataset)
    filename = r'C:\Users\sss\Desktop\SiGAT/experiment-data/{}-train-{}.edgelist'.format(dataset, k)
    adj_lists1, adj_lists1_1, adj_lists1_2, adj_lists2, adj_lists2_1, adj_lists2_2, adj_lists3 = load_data2(filename, add_public_foe=False)
    print(k, dataset, 'data load!')
    
    features = nn.Embedding(num_nodes, NODE_FEAT_SIZE)  # 建立节点embedding
    features.weight.requires_grad = True

    features.to(DEVICES)
    # 邻接矩阵集合, balance theory
    adj_lists = [adj_lists1, adj_lists1_1, adj_lists1_2, adj_lists2, adj_lists2_1, adj_lists2_2]


    # 抽取点边关系
    fea_model = FeaExtra(dataset=dataset, k=k)

    adj_additions1 = [defaultdict(set) for _ in range(16)]  # positive三角形关系的list
    adj_additions2 = [defaultdict(set) for _ in range(16)]  # negative三角形关系的list
    a, b = 0, 0
    # 三角形关系统计
    # u->v positive的三角形统计
    for i in adj_lists1_1:
        for j in adj_lists1_1[i]:
            v_list = fea_model.feature_part2(i, j)
            for index, v in enumerate(v_list):
                if v > 0:
                    adj_additions1[index][i].add(j)
                    a += 1
    # u->v negative的三角形统计
    for i in adj_lists2_1:
        for j in adj_lists2_1[i]:
            v_list = fea_model.feature_part2(i, j)
            for index, v in enumerate(v_list):
                if v > 0:
                    adj_additions2[index][i].add(j)
                    b += 1
    assert a > 0, 'positive something wrong'
    assert b > 0, 'negative something wrong'

    # 38 motifs
    adj_lists = adj_lists + adj_additions1 + adj_additions2
    
    #adj_lists = adj_lists + adj_additions1 + adj_additions2 + [adj_lists3]
    ########################

    # 2
    # adj_lists = [adj_lists1, adj_lists2]

    # 6
    # adj_lists = [adj_lists1, adj_lists1_1, adj_lists1_2, adj_lists2, adj_lists2_1, adj_lists2_2]

    # 18
    # adj_lists = adj_lists + adj_additions0

    print(len(adj_lists), 'motifs')

    def func(adj_list):
        edges = []
        for a in adj_list:
            for b in adj_list[a]:
                edges.append((a, b))  # 获取点边关系
        edges = np.array(edges)
        adj = sp.sparse.csr_matrix((np.ones(len(edges)), (edges[:,0], edges[:,1])), shape=(num_nodes, num_nodes))  # 构建稀疏矩阵
        return adj

    adj_lists = list(map(func, adj_lists))  # 每个motifs的邻接矩阵
    features_lists = [features for _ in range(len(adj_lists))]  # 每个motifs特征
    aggs = [AttentionAggregator(features, NODE_FEAT_SIZE, NODE_FEAT_SIZE, num_nodes) for features, adj in
            zip(features_lists, adj_lists)]

    enc1 = Encoder(features_lists, NODE_FEAT_SIZE, EMBEDDING_SIZE1, adj_lists, aggs)  # motifs特征组合在一起

    model = SiGAT(enc1)
    model.to(DEVICES)
    
    # print(model.train())
    
    optimizer = torch.optim.Adam(
        filter(
            lambda p: p.requires_grad,
            list(model.parameters()) + list(enc1.parameters()) + list(features.parameters())
        ),
        lr=LEARNING_RATE,
        weight_decay=WEIGHT_DECAY
    )

    for epoch in range(EPOCHS + 2):
        total_loss = []
        if epoch % INTERVAL_PRINT == 1: # !! 0
            model.eval()
            all_embedding = np.zeros((NUM_NODE, EMBEDDING_SIZE1))
            for i in range(0, NUM_NODE, BATCH_SIZE):
                begin_index = i
                end_index = i + BATCH_SIZE if i + BATCH_SIZE < NUM_NODE else NUM_NODE
                values = np.arange(begin_index, end_index)
                embed = model.forward(values.tolist())
                embed = embed.data.cpu().numpy()
                all_embedding[begin_index: end_index] = embed

            fpath = os.path.join(OUTPUT_DIR, 'embedding-{}-{}-{}.npy'.format(dataset, k, str(epoch)) )
            np.save(fpath, all_embedding)
            model.train()

        time1 = time.time()
        nodes_pku = np.random.permutation(NUM_NODE).tolist()  # 打乱节点
        for batch in range(NUM_NODE // BATCH_SIZE):
            optimizer.zero_grad()
            b_index = batch * BATCH_SIZE
            e_index = (batch + 1) * BATCH_SIZE
            nodes = nodes_pku[b_index:e_index]  # minbatch中的节点

            loss = model.criterion(
                nodes, adj_lists1, adj_lists2
            )
            total_loss.append(loss.data.cpu().numpy())

            loss.backward()
            optimizer.step()
            
        print(f'epoch: {epoch}, loss: {np.sum(total_loss)}, time: {time.time()-time1}')

In [62]:
def main():
    print('NUM_NODE', NUM_NODE)
    print('WEIGHT_DECAY', WEIGHT_DECAY)
    print('NODE_FEAT_SIZE', NODE_FEAT_SIZE)
    print('EMBEDDING_SIZE1', EMBEDDING_SIZE1)
    print('LEARNING_RATE', LEARNING_RATE)
    print('BATCH_SIZE', BATCH_SIZE)
    print('EPOCHS', EPOCHS)
    print('DROUPOUT', DROUPOUT)
    run(dataset=args.dataset, k=K)

In [63]:
main()

NUM_NODE 3783
WEIGHT_DECAY 0.0001
NODE_FEAT_SIZE 20
EMBEDDING_SIZE1 20
LEARNING_RATE 0.0005
BATCH_SIZE 500
EPOCHS 100
DROUPOUT 0.0
1 bitcoin_alpha data load!
38 motifs
SiGAT(
  (enc): Encoder(
    (agg_0): AttentionAggregator(
      (features): Embedding(3786, 20)
      (dropout): Dropout(p=0.0, inplace=False)
      (speical_spmm): SpecialSpmm()
      (out_linear_layer): Linear(in_features=20, out_features=20, bias=True)
    )
    (agg_1): AttentionAggregator(
      (features): Embedding(3786, 20)
      (dropout): Dropout(p=0.0, inplace=False)
      (speical_spmm): SpecialSpmm()
      (out_linear_layer): Linear(in_features=20, out_features=20, bias=True)
    )
    (agg_2): AttentionAggregator(
      (features): Embedding(3786, 20)
      (dropout): Dropout(p=0.0, inplace=False)
      (speical_spmm): SpecialSpmm()
      (out_linear_layer): Linear(in_features=20, out_features=20, bias=True)
    )
    (agg_3): AttentionAggregator(
      (features): Embedding(3786, 20)
      (dropout): Drop

  super(Adam, self).__init__(params, defaults)


epoch: 0, loss: 46128.16796875, time: 12.924446105957031
epoch: 1, loss: 26664.87890625, time: 14.247902870178223
epoch: 2, loss: 19646.0390625, time: 10.90584421157837
epoch: 3, loss: 16484.76171875, time: 10.223539352416992
epoch: 4, loss: 13477.919921875, time: 11.071397542953491


KeyboardInterrupt: 