### EvolveGCN
- EvolveGCN - H: 节点有信息，考虑了节点的特征变化
- EvolveGCN - O: 节点信息比较少，更关系图结构的变化

In [1]:
import os
import sys
import yaml
import math
import time
import pprint
import random
import tarfile
import itertools
import argparse
import logging
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.distributed as dist
from torch.nn.parameter import Parameter
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader

import datetime
from sklearn.metrics import average_precision_score
from scipy.sparse import coo_matrix

### Preparation

In [2]:
def pad_with_last_col(matrix,cols):
    out = [matrix]
    pad = [matrix[:,[-1]]] * (cols - matrix.size(1))
    out.extend(pad)
    return torch.cat(out,dim=1)

In [3]:
def pad_with_last_val(vect, k):
    device = 'cuda' if vect.is_cuda else 'cpu'
    pad = torch.ones(k - vect.size(0),
                         dtype=torch.long,
                         device = device) * vect[-1]
    vect = torch.cat([vect,pad])
    return vect

In [4]:
def sparse_prepare_tensor(tensor,torch_size, ignore_batch_dim = True):
    if ignore_batch_dim:
        tensor = sp_ignore_batch_dim(tensor)
    tensor = make_sparse_tensor(tensor,
                                tensor_type = 'float',
                                torch_size = torch_size)
    return tensor

In [5]:
def sp_ignore_batch_dim(tensor_dict):
    tensor_dict['idx'] = tensor_dict['idx'][0]
    tensor_dict['vals'] = tensor_dict['vals'][0]
    return tensor_dict

In [6]:
def aggregate_by_time(time_vector,time_win_aggr):
    time_vector = time_vector - time_vector.min()
    time_vector = time_vector // time_win_aggr
    return time_vector

In [7]:
def sort_by_time(data,time_col):
    _, sort = torch.sort(data[:,time_col])
    data = data[sort]
    return data

In [8]:
def print_sp_tensor(sp_tensor,size):
    print(torch.sparse.FloatTensor(sp_tensor['idx'].t(),sp_tensor['vals'],torch.Size([size,size])).to_dense())

In [9]:
def reset_param(t):
    stdv = 2. / math.sqrt(t.size(0))
    t.data.uniform_(-stdv,stdv)

In [10]:
def make_sparse_tensor(adj,tensor_type,torch_size):
    if len(torch_size) == 2:
        tensor_size = torch.Size(torch_size)
    elif len(torch_size) == 1:
        tensor_size = torch.Size(torch_size*2)

    if tensor_type == 'float':
        test = torch.sparse.FloatTensor(adj['idx'].t(),
                                      adj['vals'].type(torch.float),
                                      tensor_size)
        return torch.sparse.FloatTensor(adj['idx'].t(),
                                      adj['vals'].type(torch.float),
                                      tensor_size)
    elif tensor_type == 'long':
        return torch.sparse.LongTensor(adj['idx'].t(),
                                      adj['vals'].type(torch.long),
                                      tensor_size)
    else:
        raise NotImplementedError('only make floats or long sparse tensors')

In [11]:
def sp_to_dict(sp_tensor):
    return {'idx': sp_tensor._indices().t(), 'vals': sp_tensor._values()}

In [12]:
class Namespace(object):
    '''
    helps referencing object in a dictionary as dict.key instead of dict['key']
    '''
    def __init__(self, adict):
        self.__dict__.update(adict)

In [13]:
def set_seeds(rank):
    seed = int(time.time()) + rank
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

In [14]:
def random_param_value(param, param_min, param_max, type='int'):
    if str(param) is None or str(param).lower()=='none':
        if type=='int':
            return random.randrange(param_min, param_max+1)
        elif type=='logscale':
            interval=np.logspace(np.log10(param_min), np.log10(param_max), num=100)
            return np.random.choice(interval,1)[0]
        else:
            return random.uniform(param_min, param_max)
    else:
        return param

In [15]:
def load_data(file):
    with open(file) as file:
        file = file.read().splitlines()
    data = torch.tensor([[float(r) for r in row.split(',')] for row in file[1:]])
    return data

In [16]:
def load_data_from_tar(file, tar_archive, replace_unknow=False, starting_line=1, sep=',', type_fn=float, tensor_const=torch.DoubleTensor):
    f = tar_archive.extractfile(file)
    lines = f.read()  #
    lines=lines.decode('utf-8')
    if replace_unknow:
        lines=lines.replace('unknow', '-1')
        lines=lines.replace('-1n', '-1')

    lines=lines.splitlines()

    data = [[type_fn(r) for r in row.split(sep)] for row in lines[starting_line:]]
    data = tensor_const(data)
    # print (file,'data size', data.size())
    return data

In [17]:
def create_parser():
    parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('--config_file', default=r'C:\Users\sss\Desktop\EvolveGCN-master\experiments/parameters_example.yaml', type=argparse.FileType(mode='r'), help='optional, yaml file containing parameters to be used, overrides command line parameters')
    return parser

In [18]:
def parse_args(parser):    
    
    args = parser.parse_args(args=[])
        
    if args.config_file:
        data = yaml.load(args.config_file)
        delattr(args, 'config_file')
        # print(data)
        arg_dict = args.__dict__
        for key, value in data.items():
            arg_dict[key] = value

    args.learning_rate = random_param_value(args.learning_rate, args.learning_rate_min, args.learning_rate_max, type='logscale')
    # args.adj_mat_time_window = random_param_value(args.adj_mat_time_window, args.adj_mat_time_window_min, args.adj_mat_time_window_max, type='int')
    args.num_hist_steps = random_param_value(args.num_hist_steps, args.num_hist_steps_min, args.num_hist_steps_max, type='int')
    args.gcn_parameters['feats_per_node'] =random_param_value(args.gcn_parameters['feats_per_node'], args.gcn_parameters['feats_per_node_min'], args.gcn_parameters['feats_per_node_max'], type='int')
    args.gcn_parameters['layer_1_feats'] =random_param_value(args.gcn_parameters['layer_1_feats'], args.gcn_parameters['layer_1_feats_min'], args.gcn_parameters['layer_1_feats_max'], type='int')
    
    if args.gcn_parameters['layer_2_feats_same_as_l1'] or args.gcn_parameters['layer_2_feats_same_as_l1'].lower()=='true':
        args.gcn_parameters['layer_2_feats'] = args.gcn_parameters['layer_1_feats']
    else:
        args.gcn_parameters['layer_2_feats'] =random_param_value(args.gcn_parameters['layer_2_feats'], args.gcn_parameters['layer_1_feats_min'], args.gcn_parameters['layer_1_feats_max'], type='int')
    args.gcn_parameters['lstm_l1_feats'] =random_param_value(args.gcn_parameters['lstm_l1_feats'], args.gcn_parameters['lstm_l1_feats_min'], args.gcn_parameters['lstm_l1_feats_max'], type='int')
    
    if args.gcn_parameters['lstm_l2_feats_same_as_l1'] or args.gcn_parameters['lstm_l2_feats_same_as_l1'].lower()=='true':
        args.gcn_parameters['lstm_l2_feats'] = args.gcn_parameters['lstm_l1_feats']
    else:
        args.gcn_parameters['lstm_l2_feats'] =random_param_value(args.gcn_parameters['lstm_l2_feats'], args.gcn_parameters['lstm_l1_feats_min'], args.gcn_parameters['lstm_l1_feats_max'], type='int')
    args.gcn_parameters['cls_feats'] =random_param_value(args.gcn_parameters['cls_feats'], args.gcn_parameters['cls_feats_min'], args.gcn_parameters['cls_feats_max'], type='int')

    
    
    return args

#### Datasets

In [19]:
class Autonomous_Systems_Dataset():
    def __init__(self,args):
        args.aut_sys_args = Namespace(args.aut_sys_args)
        tar_file = os.path.join(args.aut_sys_args.folder, args.aut_sys_args.tar_file)  
        tar_archive = tarfile.open(tar_file, 'r:gz')
        self.edges = self.load_edges(args,tar_archive)

    def load_edges(self,args,tar_archive):
        files = tar_archive.getnames()
        cont_files2times = self.times_from_names(files)
        edges = []
        cols = Namespace({
            'source': 0,
            'target': 1,
            'time': 2
        })
        
        for file in files:
            data = load_data_from_tar(file, tar_archive, starting_line=4, sep='\t', type_fn = int, tensor_const = torch.LongTensor)
            time_col = torch.zeros(data.size(0), 1, dtype=torch.long) + cont_files2times[file]
            data = torch.cat([data, time_col], dim = 1)
            data = torch.cat([data, data[:, [cols.target, cols.source, cols.time]]])
            edges.append(data)

        edges = torch.cat(edges)
        _, edges[:,[cols.source,cols.target]] = edges[:,[cols.source,cols.target]].unique(return_inverse = True)

        # use only first X time steps
        indices = edges[:, cols.time] < args.aut_sys_args.steps_accounted
        edges = edges[indices, :]
            
        # time aggregation
        edges[:, cols.time] = aggregate_by_time(edges[:, cols.time], args.aut_sys_args.aggr_time)
        self.num_nodes = int(edges[:, [cols.source,cols.target]].max() + 1)

        ids = edges[:, cols.source] * self.num_nodes + edges[:, cols.target]
        self.num_non_existing = float(self.num_nodes**2 - ids.unique().size(0))
        self.max_time = edges[:,cols.time].max()
        self.min_time = edges[:,cols.time].min()
            
        return {'idx': edges, 'vals': torch.ones(edges.size(0))}

    def times_from_names(self,files):
        files2times = {}
        times2files = {}

        base = datetime.strptime("19800101", '%Y%m%d')
        for file in files:
            delta =  (datetime.strptime(file[2: -4], '%Y%m%d') - base).days

            files2times[file] = delta
            times2files[delta] = file
            
        cont_files2times = {}

        sorted_times = sorted(files2times.values())
        new_t = 0

        for t in sorted_times:
            file = times2files[t]
            cont_files2times[file] = new_t
            new_t += 1
        return cont_files2times

In [20]:
class bitcoin_dataset():
    def __init__(self,args):
        assert args.task in ['link_pred', 'edge_cls'], 'bitcoin only implements link_pred or edge_cls'
        self.ecols = Namespace({
            'FromNodeId': 0,
            'ToNodeId': 1,
            'Weight': 2,
            'TimeStep': 3
        })
        args.bitcoin_args = Namespace(args.bitcoin_args)

        # build edge data structure
        edges = self.load_edges(args.bitcoin_args)

        edges = self.make_contigous_node_ids(edges)
        num_nodes = edges[:, [self.ecols.FromNodeId, self.ecols.ToNodeId]].unique().size(0)

        timesteps = aggregate_by_time(edges[:, self.ecols.TimeStep], args.bitcoin_args.aggr_time)
        self.max_time = timesteps.max()
        self.min_time = timesteps.min()
        edges[:,self.ecols.TimeStep] = timesteps

        edges[:,self.ecols.Weight] = self.cluster_negs_and_positives(edges[:, self.ecols.Weight])


        # add the reversed link to make the graph undirected
        edges = torch.cat([edges,edges[:, [self.ecols.ToNodeId, self.ecols.FromNodeId, self.ecols.Weight, self.ecols.TimeStep]]])

        # separate classes
        sp_indices = edges[:, [self.ecols.FromNodeId, self.ecols.ToNodeId, self.ecols.TimeStep]].t()
        sp_values = edges[:, self.ecols.Weight]

        neg_mask = sp_values == -1

        neg_sp_indices = sp_indices[:,neg_mask]
        neg_sp_values = sp_values[neg_mask]
        neg_sp_edges = torch.sparse.LongTensor(neg_sp_indices ,neg_sp_values, torch.Size([num_nodes, num_nodes, self.max_time + 1])).coalesce()

        pos_mask = sp_values == 1

        pos_sp_indices = sp_indices[:,pos_mask]
        pos_sp_values = sp_values[pos_mask]

        pos_sp_edges = torch.sparse.LongTensor(pos_sp_indices, pos_sp_values, torch.Size([num_nodes, num_nodes, self.max_time + 1])).coalesce()

        #scale positive class to separate after adding
        pos_sp_edges *= 1000

        #we substract the neg_sp_edges to make the values positive
        sp_edges = (pos_sp_edges - neg_sp_edges).coalesce()

        #separating negs and positive edges per edge/timestamp
        vals = sp_edges._values()
        neg_vals = vals % 1000
        pos_vals = vals // 1000
        #We add the negative and positive scores and do majority voting
        vals = pos_vals - neg_vals
        #creating labels new_vals -> the label of the edges
        new_vals = torch.zeros(vals.size(0),dtype=torch.long)
        new_vals[vals>0] = 1
        new_vals[vals<=0] = 0
        indices_labels = torch.cat([sp_edges._indices().t(),new_vals.view(-1, 1)],dim=1)

        #the weight of the edges (vals), is simply the number of edges between two entities at each time_step
        vals = pos_vals + neg_vals


        self.edges = {'idx': indices_labels, 'vals': vals}
        self.num_nodes = num_nodes
        self.num_classes = 2

    def cluster_negs_and_positives(self,ratings):
        pos_indices = ratings > 0
        neg_indices = ratings <= 0
        ratings[pos_indices] = 1
        ratings[neg_indices] = -1
        return ratings

    def prepare_node_feats(self,node_feats):
        node_feats = node_feats[0]
        return node_feats

    def edges_to_sp_dict(self,edges):
        idx = edges[:, [self.ecols.FromNodeId, self.ecols.ToNodeId, self.ecols.TimeStep]]

        vals = edges[:, self.ecols.Weight]
        return {'idx': idx, 'vals': vals}

    def get_num_nodes(self,edges):
        all_ids = edges[:,[self.ecols.FromNodeId,self.ecols.ToNodeId]]
        num_nodes = all_ids.max() + 1
        return num_nodes

    def load_edges(self,bitcoin_args):
        file = os.path.join(bitcoin_args.folder,bitcoin_args.edges_file)
        with open(file) as f:
            lines = f.read().splitlines()
        edges = [[float(r) for r in row.split(',')] for row in lines]
        edges = torch.tensor(edges,dtype = torch.long)
        return edges

    def make_contigous_node_ids(self,edges):
        new_edges = edges[:,[self.ecols.FromNodeId,self.ecols.ToNodeId]]
        _, new_edges = new_edges.unique(return_inverse=True)
        edges[:,[self.ecols.FromNodeId,self.ecols.ToNodeId]] = new_edges
        return edges

In [21]:
class Elliptic_Temporal_Dataset():
    def __init__(self,args):
        args.elliptic_args = Namespace(args.elliptic_args)
        tar_file = os.path.join(args.elliptic_args.folder, args.elliptic_args.tar_file)
        tar_archive = tarfile.open(tar_file, 'r:gz')

        self.nodes_labels_times = self.load_node_labels(args.elliptic_args, tar_archive)
        self.edges = self.load_transactions(args.elliptic_args, tar_archive)
        self.nodes, self.nodes_feats = self.load_node_feats(args.elliptic_args, tar_archive)

    def load_node_feats(self, elliptic_args, tar_archive):
        data = load_data_from_tar(elliptic_args.feats_file, tar_archive, starting_line=0)
        nodes = data
        nodes_feats = nodes[:,1:]

        self.num_nodes = len(nodes)
        self.feats_per_node = data.size(1) - 1
        
        return nodes, nodes_feats.float()

    def load_node_labels(self, elliptic_args, tar_archive):
        labels = load_data_from_tar(elliptic_args.classes_file, tar_archive, replace_unknow=True).long()
        times = load_data_from_tar(elliptic_args.times_file, tar_archive, replace_unknow=True).long()
        lcols = Namespace({'nid': 0, 'label': 1})
        tcols = Namespace({'nid':0, 'time':1})

        nodes_labels_times =[]
        for i in range(len(labels)):
            label = labels[i, [lcols.label]].long()
            if label >= 0:
                nid=labels[i, [lcols.nid]].long()
                time=times[nid, [tcols.time]].long()
                nodes_labels_times.append([nid, label, time])
        nodes_labels_times = torch.tensor(nodes_labels_times)

        return nodes_labels_times


    def load_transactions(self, elliptic_args, tar_archive):
        data = load_data_from_tar(elliptic_args.edges_file, tar_archive, type_fn=float, tensor_const=torch.LongTensor)
        tcols = Namespace({'source': 0, 'target': 1, 'time': 2})
        data = torch.cat([data,data[:,[1,0,2]]])

        self.max_time = data[:,tcols.time].max()
        self.min_time = data[:,tcols.time].min()

        return {'idx': data, 'vals': torch.ones(data.size(0))}

In [22]:
class Reddit_Dataset():
    def __init__(self,args):
        args.reddit_args = Namespace(args.reddit_args)
        folder = args.reddit_args.folder

        # load nodes
        cols = Namespace({'id': 0, 'feats': 1})
        file = args.reddit_args.nodes_file
        file = os.path.join(folder,file)
        with open(file) as file:
            file = file.read().splitlines()
            
        ids_str_to_int = {}
        id_counter = 0

        feats = []

        for line in file:
            line = line.split(',')
            # node id
            nd_id = line[0]
            if nd_id not in ids_str_to_int.keys():
                ids_str_to_int[nd_id] = id_counter
                id_counter += 1
                nd_feats = [float(r) for r in line[1:]]
                feats.append(nd_feats)
            else:
                print('duplicate id', nd_id)
                raise Exception('duplicate_id')

        feats = torch.tensor(feats,dtype=torch.float)
        num_nodes = feats.size(0)
            
        edges = []
        not_found = 0

        # load edges in title
        edges_tmp, not_found_tmp = self.load_edges_from_file(args.reddit_args.title_edges_file, folder, ids_str_to_int)
        edges.extend(edges_tmp)
        not_found += not_found_tmp
            
        # load edges in bodies
        edges_tmp, not_found_tmp = self.load_edges_from_file(args.reddit_args.body_edges_file, folder, ids_str_to_int)
        edges.extend(edges_tmp)
        not_found += not_found_tmp

        # min time should be 0 and time aggregation
        edges = torch.LongTensor(edges)
        edges[:,2] = aggregate_by_time(edges[:,2],args.reddit_args.aggr_time)
        max_time = edges[:,2].max()

        # separate classes
        sp_indices = edges[:,:3].t()
        sp_values = edges[:,3]

#         sp_edges = torch.sparse.LongTensor(sp_indices ,sp_values, torch.Size([num_nodes, num_nodes, max_time + 1])).coalesce()
#         vals = sp_edges._values()
#         print(vals[vals>0].sum() + vals[vals<0].sum()*-1)
#         asdf
            
        pos_mask = sp_values == 1
        neg_mask = sp_values == -1

        neg_sp_indices = sp_indices[:,neg_mask]
        neg_sp_values = sp_values[neg_mask]
        neg_sp_edges = torch.sparse.LongTensor(neg_sp_indices, neg_sp_values, torch.Size([num_nodes, num_nodes, max_time + 1])).coalesce()

        pos_sp_indices = sp_indices[:, pos_mask]
        pos_sp_values = sp_values[pos_mask]
            
        pos_sp_edges = torch.sparse.LongTensor(pos_sp_indices, pos_sp_values, torch.Size([num_nodes, num_nodes, max_time + 1])).coalesce()

        # scale positive class to separate after adding
        pos_sp_edges *= 1000
            
        sp_edges = (pos_sp_edges - neg_sp_edges).coalesce()
        
        # separating negs and positive edges per edge/timestamp
        vals = sp_edges._values()
        neg_vals = vals%1000
        pos_vals = vals//1000
        # vals is simply the number of edges between two nodes at the same time_step, regardless of the edge label
        vals = pos_vals - neg_vals

        # creating labels new_vals -> the label of the edges
        new_vals = torch.zeros(vals.size(0),dtype=torch.long)
        new_vals[vals>0] = 1
        new_vals[vals<=0] = 0
        vals = pos_vals + neg_vals
        indices_labels = torch.cat([sp_edges._indices().t(),new_vals.view(-1,1)],dim=1)
            
        self.edges = {'idx': indices_labels, 'vals': vals}
        self.num_classes = 2
        self.feats_per_node = feats.size(1)
        self.num_nodes = num_nodes
        self.nodes_feats = feats
        self.max_time = max_time
        self.min_time = 0

    def prepare_node_feats(self,node_feats):
        node_feats = node_feats[0]
        return node_feats

        
    def load_edges_from_file(self,edges_file,folder,ids_str_to_int):
        edges = []
        not_found = 0

        file = edges_file
            
        file = os.path.join(folder,file)
        with open(file) as file:
            file = file.read().splitlines()

        cols = Namespace({
            'source': 0,
            'target': 1,
            'time': 3,
            'label': 4
        })

        base_time = datetime.strptime("19800101", '%Y%m%d')

            
        for line in file[1:]:
            fields = line.split('\t')
            sr = fields[cols.source]
            tg = fields[cols.target]

            if sr in ids_str_to_int.keys() and tg in ids_str_to_int.keys():
                sr = ids_str_to_int[sr]
                tg = ids_str_to_int[tg]

                time = fields[cols.time].split(' ')[0]
                time = datetime.strptime(time,'%Y-%m-%d')
                time = (time - base_time).days

                label = int(fields[cols.label])
                edges.append([sr,tg,time,label])
                # add the other edge to make it undirected
                edges.append([tg,sr,time,label])
            else:
                not_found+=1

        return edges, not_found

In [23]:
class sbm_dataset():
    def __init__(self,args):
        assert args.task in ['link_pred'], 'sbm only implements link_pred'
        self.ecols = Namespace({
            'FromNodeId': 0,
            'ToNodeId': 1,
            'Weight': 2,
            'TimeStep': 3
        })
        args.sbm_args = Namespace(args.sbm_args)

        # build edge data structure
        edges = self.load_edges(args.sbm_args)
        timesteps = aggregate_by_time(edges[:,self.ecols.TimeStep], args.sbm_args.aggr_time)
        self.max_time = timesteps.max()
        self.min_time = timesteps.min()
        print ('TIME', self.max_time, self.min_time )
        
        edges[:, self.ecols.TimeStep] = timesteps
        edges[:, self.ecols.Weight] = self.cluster_negs_and_positives(edges[:, self.ecols.Weight])
        
        self.num_classes = edges[:, self.ecols.Weight].unique().size(0)
        self.edges = self.edges_to_sp_dict(edges)
        
        # random node features
        self.num_nodes = int(self.get_num_nodes(edges))
        self.feats_per_node = args.sbm_args.feats_per_node
        self.nodes_feats = torch.rand((self.num_nodes, self.feats_per_node))
        self.num_non_existing = self.num_nodes ** 2 - edges.size(0)

    def cluster_negs_and_positives(self, ratings):
        pos_indices = ratings >= 0
        neg_indices = ratings < 0
        ratings[pos_indices] = 1
        ratings[neg_indices] = 0
        return ratings

    def prepare_node_feats(self,node_feats):
        node_feats = node_feats[0]
        return node_feats

    def edges_to_sp_dict(self,edges):
        idx = edges[:, [self.ecols.FromNodeId, self.ecols.ToNodeId, self.ecols.TimeStep]]

        vals = edges[:, self.ecols.Weight]
        return {'idx': idx, 'vals': vals}

    def get_num_nodes(self,edges):
        all_ids = edges[:, [self.ecols.FromNodeId,self.ecols.ToNodeId]]
        num_nodes = all_ids.max() + 1
        return num_nodes

    def load_edges(self,sbm_args, starting_line = 1):
        file = os.path.join(sbm_args.folder, sbm_args.edges_file)
        with open(file) as f:
            lines = f.read().splitlines()
        edges = [[float(r) for r in row.split(',')] for row in lines[starting_line:]]
        edges = torch.tensor(edges,dtype = torch.long)
        return edges

    def make_contigous_node_ids(self,edges):
        new_edges = edges[:,[self.ecols.FromNodeId,self.ecols.ToNodeId]]
        _, new_edges = new_edges.unique(return_inverse=True)
        edges[:,[self.ecols.FromNodeId,self.ecols.ToNodeId]] = new_edges
        return edges

In [24]:
class Uc_Irvine_Message_Dataset():
    def __init__(self,args):
        args.uc_irc_args = Namespace(args.uc_irc_args)

        tar_file = os.path.join(args.uc_irc_args.folder, args.uc_irc_args.tar_file)  
        tar_archive = tarfile.open(tar_file, 'r:bz2')

        self.edges = self.load_edges(args,tar_archive)

    def load_edges(self,args,tar_archive):
        data = load_data_from_tar(args.uc_irc_args.edges_file, tar_archive, starting_line=2, sep=' ')
        cols = Namespace({
            'source': 0,
            'target': 1,
            'weight': 2,
            'time': 3
        })

        data = data.long()

        self.num_nodes = int(data[:, [cols.source,cols.target]].max())

        # first id should be 0 (they are already contiguous)
        data[:, [cols.source,cols.target]] -= 1

        # add edges in the other direction (simmetric)
        data = torch.cat([data, data[:,[cols.target, cols.source, cols.weight, cols.time]]], dim=0)

        data[:, cols.time] = aggregate_by_time(data[:,cols.time], args.uc_irc_args.aggr_time)

        ids = data[:,cols.source] * self.num_nodes + data[:,cols.target]
        self.num_non_existing = float(self.num_nodes**2 - ids.unique().size(0))

        idx = data[:,[cols.source, cols.target, cols.time]]

        self.max_time = data[:,cols.time].max()
        self.min_time = data[:,cols.time].min()
            

        return {'idx': idx, 'vals': torch.ones(idx.size(0))}

#### Tasker Preparation

In [25]:
ECOLS = Namespace({
    'source': 0,
    'target': 1,
    'time': 2,
    'label':3
}) # --> added for edge_cls

In [26]:
# def get_2_hot_deg_feats(adj,max_deg_out,max_deg_in,num_nodes):
#     # For now it'll just return a 2-hot vector
#     adj['vals'] = torch.ones(adj['idx'].size(0))
#     degs_out, degs_in = get_degree_vects(adj,num_nodes)
    
#     degs_out = {'idx': torch.cat([torch.arange(num_nodes).view(-1,1),
#                                   degs_out.view(-1,1)],dim=1),
#                 'vals': torch.ones(num_nodes)}
    
#     # print ('XXX degs_out',degs_out['idx'].size(),degs_out['vals'].size())
#     degs_out = make_sparse_tensor(degs_out,'long',[num_nodes,max_deg_out])

#     degs_in = {'idx': torch.cat([torch.arange(num_nodes).view(-1,1),
#                                   degs_in.view(-1,1)],dim=1),
#                 'vals': torch.ones(num_nodes)}
#     degs_in = make_sparse_tensor(degs_in,'long',[num_nodes,max_deg_in])

#     hot_2 = torch.cat([degs_out,degs_in],dim = 1)
#     hot_2 = {'idx': hot_2._indices().t(),
#              'vals': hot_2._values()}

#     return hot_2

In [27]:
def get_1_hot_deg_feats(adj,max_deg,num_nodes):
    # For now it'll just return a 2-hot vector
    new_vals = torch.ones(adj['idx'].size(0))
    new_adj = {'idx':adj['idx'], 'vals': new_vals}
    degs_out, _ = get_degree_vects(new_adj,num_nodes)
    
    degs_out = {
        'idx': torch.cat([torch.arange(num_nodes).view(-1,1), degs_out.view(-1,1)],dim=1),
        'vals': torch.ones(num_nodes)
    }
    
    # print ('XXX degs_out',degs_out['idx'].size(),degs_out['vals'].size())
    degs_out = make_sparse_tensor(degs_out, 'long', [num_nodes,max_deg])

    hot_1 = {'idx': degs_out._indices().t(), 'vals': degs_out._values()}
    return hot_1

In [28]:
def get_max_degs(args,dataset,all_window=False):
    max_deg_out = []
    max_deg_in = []
    for t in range(dataset.min_time, dataset.max_time):
        if all_window:
            window = t+1
        else:
            window = args.adj_mat_time_window

        cur_adj = get_sp_adj(edges = dataset.edges, time = t, weighted = False, time_window = window)
        # print(window)
        cur_out, cur_in = get_degree_vects(cur_adj,dataset.num_nodes)
        max_deg_out.append(cur_out.max())
        max_deg_in.append(cur_in.max())
        # max_deg_out = torch.stack([max_deg_out,cur_out.max()]).max()
        # max_deg_in = torch.stack([max_deg_in,cur_in.max()]).max()
    # exit()
    max_deg_out = torch.stack(max_deg_out).max()
    max_deg_in = torch.stack(max_deg_in).max()
    max_deg_out = int(max_deg_out) + 1
    max_deg_in = int(max_deg_in) + 1
    
    return max_deg_out, max_deg_in

In [29]:
def get_max_degs_static(num_nodes, adj_matrix):
    cur_out, cur_in = get_degree_vects(adj_matrix, num_nodes)
    max_deg_out = int(cur_out.max().item()) + 1
    max_deg_in = int(cur_in.max().item()) + 1
    
    return max_deg_out, max_deg_in

In [30]:
def get_degree_vects(adj,num_nodes):
    adj = make_sparse_tensor(adj,'long',[num_nodes])
    degs_out = adj.matmul(torch.ones(num_nodes,1,dtype = torch.long))
    degs_in = adj.t().matmul(torch.ones(num_nodes,1,dtype = torch.long))
    return degs_out, degs_in

In [31]:
def get_sp_adj(edges,time,weighted,time_window):
    idx = edges['idx']
    subset = idx[:,ECOLS.time] <= time
    subset = subset * (idx[:,ECOLS.time] > (time - time_window))
    idx = edges['idx'][subset][:,[ECOLS.source, ECOLS.target]]  
    vals = edges['vals'][subset]
    out = torch.sparse.FloatTensor(idx.t(),vals).coalesce()
    idx = out._indices().t()
    
    if weighted:
        vals = out._values()
    else:
        vals = torch.ones(idx.size(0),dtype=torch.long)

    return {'idx': idx, 'vals': vals}

In [32]:
def get_edge_labels(edges,time):
    idx = edges['idx']
    subset = idx[:,ECOLS.time] == time
    idx = edges['idx'][subset][:,[ECOLS.source, ECOLS.target]]  
    vals = edges['idx'][subset][:,ECOLS.label]

    return {'idx': idx, 'vals': vals}

In [33]:
def get_node_mask(cur_adj,num_nodes):
    mask = torch.zeros(num_nodes) - float("Inf")
    non_zero = cur_adj['idx'].unique()
    mask[non_zero] = 0
    
    return mask

In [34]:
def get_static_sp_adj(edges,weighted):
    idx = edges['idx']
    #subset = idx[:,ECOLS.time] <= time
    #subset = subset * (idx[:,ECOLS.time] > (time - time_window))
    #idx = edges['idx'][subset][:,[ECOLS.source, ECOLS.target]]  
    if weighted:
        vals = edges['vals'][subset]
    else:
        vals = torch.ones(idx.size(0),dtype = torch.long)

    return {'idx': idx, 'vals': vals}

In [35]:
def get_sp_adj_only_new(edges,time,weighted):
    return get_sp_adj(edges, time, weighted, time_window=1)

In [36]:
def normalize_adj(adj,num_nodes):
    '''
    takes an adj matrix as a dict with idx and vals and normalize it by: 
        - adding an identity matrix, 
        - computing the degree vector
        - multiplying each element of the adj matrix (aij) by (di*dj)^-1/2
    '''
    idx = adj['idx']
    vals = adj['vals']

    
    sp_tensor = torch.sparse.FloatTensor(idx.t(),vals.type(torch.float),torch.Size([num_nodes,num_nodes]))
    
    sparse_eye = make_sparse_eye(num_nodes)
    sp_tensor = sparse_eye + sp_tensor

    idx = sp_tensor._indices()
    vals = sp_tensor._values()

    degree = torch.sparse.sum(sp_tensor,dim=1).to_dense()
    di = degree[idx[0]]
    dj = degree[idx[1]]

    vals = vals * ((di * dj) ** -0.5)
    
    return {'idx': idx.t(), 'vals': vals}

In [37]:
def make_sparse_eye(size):
    eye_idx = torch.arange(size)
    eye_idx = torch.stack([eye_idx,eye_idx],dim=1).t()
    vals = torch.ones(size)
    eye = torch.sparse.FloatTensor(eye_idx,vals,torch.Size([size,size]))
    return eye

In [38]:
def get_all_non_existing_edges(adj,tot_nodes):
    true_ids = adj['idx'].t().numpy()
    true_ids = get_edges_ids(true_ids,tot_nodes)

    all_edges_idx = np.arange(tot_nodes)
    all_edges_idx = np.array(np.meshgrid(all_edges_idx,
                                         all_edges_idx)).reshape(2,-1)

    all_edges_ids = get_edges_ids(all_edges_idx,tot_nodes)

    #only edges that are not in the true_ids should keep here
    mask = np.logical_not(np.isin(all_edges_ids,true_ids))

    non_existing_edges_idx = all_edges_idx[:,mask]
    edges = torch.tensor(non_existing_edges_idx).t()
    vals = torch.zeros(edges.size(0), dtype = torch.long)
    return {'idx': edges, 'vals': vals}

In [39]:
def get_non_existing_edges(adj,number, tot_nodes, smart_sampling, existing_nodes=None):
    # print('----------')
    t0 = time.time()
    idx = adj['idx'].t().numpy()
    true_ids = get_edges_ids(idx,tot_nodes)

    true_ids = set(true_ids)

    #the maximum of edges would be all edges that don't exist between nodes that have edges
    num_edges = min(number,idx.shape[1] * (idx.shape[1]-1) - len(true_ids))

    if smart_sampling:
        #existing_nodes = existing_nodes.numpy()
        def sample_edges(num_edges):
            # print('smart_sampling')
            from_id = np.random.choice(idx[0],size = num_edges,replace = True)
            to_id = np.random.choice(existing_nodes,size = num_edges, replace = True)
            #print ('smart_sampling', from_id, to_id)
            
            if num_edges>1:
                edges = np.stack([from_id,to_id])
            else:
                edges = np.concatenate([from_id,to_id])
            return edges
    else:
        def sample_edges(num_edges):
            if num_edges > 1:
                edges = np.random.randint(0,tot_nodes,(2,num_edges))
            else:
                edges = np.random.randint(0,tot_nodes,(2,))
            return edges

    edges = sample_edges(num_edges*4)

    edge_ids = edges[0] * tot_nodes + edges[1]
    
    out_ids = set()
    num_sampled = 0
    sampled_indices = []
    for i in range(num_edges*4):
        eid = edge_ids[i]
        #ignore if any of these conditions happen
        if eid in out_ids or edges[0,i] == edges[1,i] or eid in true_ids:
            continue

        #add the eid and the index to a list
        out_ids.add(eid)
        sampled_indices.append(i)
        num_sampled += 1

        #if we have sampled enough edges break
        if num_sampled >= num_edges:
            break

    edges = edges[:,sampled_indices]
    edges = torch.tensor(edges).t()
    vals = torch.zeros(edges.size(0),dtype = torch.long)
    return {'idx': edges, 'vals': vals}

In [40]:
def get_edges_ids(sp_idx, tot_nodes):
    # print(sp_idx)
    # print(tot_nodes)
    # print(sp_idx[0]*tot_nodes)
    return sp_idx[0]*tot_nodes + sp_idx[1]

#### Taskers

In [41]:
class Edge_Cls_Tasker():
    def __init__(self, args, dataset):
        self.data = dataset
        # max_time for link pred should be one before
        self.max_time = dataset.max_time
        self.args = args
        self.num_classes = dataset.num_classes

        if not args.use_1_hot_node_feats:
            self.feats_per_node = dataset.feats_per_node

        self.get_node_feats = self.build_get_node_feats(args,dataset)
        self.prepare_node_feats = self.build_prepare_node_feats(args,dataset)
            
        self.is_static = False

    def build_prepare_node_feats(self,args,dataset):
        if args.use_2_hot_node_feats or args.use_1_hot_node_feats:
            def prepare_node_feats(node_feats):
                return sparse_prepare_tensor(node_feats, torch_size = [dataset.num_nodes, self.feats_per_node])
        else:
            prepare_node_feats = self.data.prepare_node_feats

        return prepare_node_feats


    def build_get_node_feats(self,args,dataset):
        if args.use_2_hot_node_feats:
            max_deg_out, max_deg_in = get_max_degs(args,dataset)
            self.feats_per_node = max_deg_out + max_deg_in
            def get_node_feats(adj):
                return get_2_hot_deg_feats(adj, max_deg_out, max_deg_in, dataset.num_nodes)
        elif args.use_1_hot_node_feats:
            max_deg,_ = get_max_degs(args,dataset)
            self.feats_per_node = max_deg
            def get_node_feats(adj):
                return get_1_hot_deg_feats(adj, max_deg, dataset.num_nodes)
        else:
            def get_node_feats(adj):
                return dataset.nodes_feats

        return get_node_feats


    def get_sample(self,idx,test):
        hist_adj_list = []
        hist_ndFeats_list = []
        hist_mask_list = []

        for i in range(idx - self.args.num_hist_steps, idx+1):
            cur_adj = get_sp_adj(edges = self.data.edges, 
                                    time = i,
                                    weighted = True,
                                    time_window = self.args.adj_mat_time_window)
            node_mask = get_node_mask(cur_adj, self.data.num_nodes)
            node_feats = self.get_node_feats(cur_adj)
            cur_adj = normalize_adj(adj = cur_adj, num_nodes = self.data.num_nodes)

            hist_adj_list.append(cur_adj)
            hist_ndFeats_list.append(node_feats)
            hist_mask_list.append(node_mask)

        label_adj = get_edge_labels(edges = self.data.edges, time = idx)
            
        return {'idx': idx, 'hist_adj_list': hist_adj_list, 'hist_ndFeats_list': hist_ndFeats_list, 'label_sp': label_adj, 'node_mask_list': hist_mask_list}

In [42]:
class Link_Pred_Tasker():
    '''
    Creates a tasker object which computes the required inputs for training on a link prediction
    task. It receives a dataset object which should have two attributes: nodes_feats and edges, this
    makes the tasker independent of the dataset being used (as long as mentioned attributes have the same
    structure).

    Based on the dataset it implements the get_sample function required by edge_cls_trainer.
    This is a dictionary with:
        - time_step: the time_step of the prediction
        - hist_adj_list: the input adjacency matrices until t, each element of the list 
                         is a sparse tensor with the current edges. For link_pred they're
                         unweighted
        - nodes_feats_list: the input nodes for the GCN models, each element of the list is a tensor
                            two dimmensions: node_idx and node_feats
        - label_adj: a sparse representation of the target edges. A dict with two keys: idx: M by 2 
                     matrix with the indices of the nodes conforming each edge, vals: 1 if the node exists, 0 if it doesn't

    There's a test difference in the behavior, on test (or development), the number of sampled non existing 
    edges should be higher.
    '''
    def __init__(self,args,dataset):
        self.data = dataset
        # max_time for link pred should be one before
        self.max_time = dataset.max_time - 1
        self.args = args
        self.num_classes = 2

        if not (args.use_2_hot_node_feats or args.use_1_hot_node_feats):
            self.feats_per_node = dataset.feats_per_node

        self.get_node_feats = self.build_get_node_feats(args,dataset)
        self.prepare_node_feats = self.build_prepare_node_feats(args,dataset)
        self.is_static = False
        
        '''TO CREATE THE CSV DATASET TO USE IN DynGEM
        print ('min max time:', self.data.min_time, self.data.max_time)
        file = open('data/autonomous_syst100_adj.csv','w')
        file.write ('source,target,weight,time\n')
        for time in range(self.data.min_time, self.data.max_time):
            adj_t = get_sp_adj(edges = self.data.edges,
                       time = time,
                       weighted = True,
                       time_window = 1)
            # node_feats = self.get_node_feats(adj_t)
            print (time, len(adj_t))
            idx = adj_t['idx']
            vals = adj_t['vals']
            num_nodes = self.data.num_nodes
            sp_tensor = torch.sparse.FloatTensor(idx.t(),vals.type(torch.float),torch.Size([num_nodes,num_nodes]))
            dense_tensor = sp_tensor.to_dense()
            idx = sp_tensor._indices()
            for i in range(idx.size()[1]):
                i0=idx[0,i]
                i1=idx[1,i]
                w = dense_tensor[i0,i1]
                file.write(str(i0.item())+','+str(i1.item())+','+str(w.item())+','+str(time)+'\n')

            # for i, v in zip(idx, vals):
            # file.write(str(i[0].item())+','+str(i[1].item())+','+str(v.item())+','+str(time)+'\n')

        file.close()
        exit'''

#     def build_get_non_existing(args):
#         if args.use_smart_neg_sampling:
#         else:
#             return get_non_existing_edges

    def build_prepare_node_feats(self,args,dataset):
        if args.use_2_hot_node_feats or args.use_1_hot_node_feats:
            def prepare_node_feats(node_feats):
                return sparse_prepare_tensor(node_feats, torch_size= [dataset.num_nodes, self.feats_per_node])
        else:
            prepare_node_feats = self.data.prepare_node_feats

        return prepare_node_feats


    def build_get_node_feats(self,args,dataset):
        if args.use_2_hot_node_feats:
            max_deg_out, max_deg_in = get_max_degs(args,dataset)
            self.feats_per_node = max_deg_out + max_deg_in
            def get_node_feats(adj):
                return get_2_hot_deg_feats(adj, max_deg_out, max_deg_in, dataset.num_nodes)
        elif args.use_1_hot_node_feats:
            max_deg,_ = get_max_degs(args,dataset)
            self.feats_per_node = max_deg
            def get_node_feats(adj):
                return get_1_hot_deg_feats(adj, max_deg, dataset.num_nodes)
        else:
            def get_node_feats(adj):
                return dataset.nodes_feats

        return get_node_feats


    def get_sample(self,idx,test, **kwargs):
        hist_adj_list = []
        hist_ndFeats_list = []
        hist_mask_list = []
        existing_nodes = []
        for i in range(idx - self.args.num_hist_steps, idx+1):
            cur_adj = get_sp_adj(edges = self.data.edges, time = i, weighted = True, time_window = self.args.adj_mat_time_window)

            if self.args.smart_neg_sampling:
                existing_nodes.append(cur_adj['idx'].unique())
            else:
                existing_nodes = None

            node_mask = get_node_mask(cur_adj, self.data.num_nodes)
            node_feats = self.get_node_feats(cur_adj)
            cur_adj = normalize_adj(adj = cur_adj, num_nodes = self.data.num_nodes)

            hist_adj_list.append(cur_adj)
            hist_ndFeats_list.append(node_feats)
            hist_mask_list.append(node_mask)

        # This would be if we were training on all the edges in the time_window
        label_adj = get_sp_adj(edges = self.data.edges, time = idx+1, weighted = False, time_window =  self.args.adj_mat_time_window)
        if test:
            neg_mult = self.args.negative_mult_test
        else:
            neg_mult = self.args.negative_mult_training
            
        if self.args.smart_neg_sampling:
            existing_nodes = torch.cat(existing_nodes)

            
        if 'all_edges' in kwargs.keys() and kwargs['all_edges'] == True:
            non_exisiting_adj = get_all_non_existing_edges(adj = label_adj, tot_nodes = self.data.num_nodes)
        else:
            non_exisiting_adj = get_non_existing_edges(adj = label_adj, 
                                                          number = label_adj['vals'].size(0) * neg_mult,
                                                          tot_nodes = self.data.num_nodes,
                                                          smart_sampling = self.args.smart_neg_sampling,
                                                          existing_nodes = existing_nodes)

#         label_adj = get_sp_adj_only_new(edges = self.data.edges,
#                                            weighted = False,
#                                            time = idx)
            
        label_adj['idx'] = torch.cat([label_adj['idx'],non_exisiting_adj['idx']])
        label_adj['vals'] = torch.cat([label_adj['vals'],non_exisiting_adj['vals']])
        return {'idx': idx, 'hist_adj_list': hist_adj_list, 'hist_ndFeats_list': hist_ndFeats_list, 'label_sp': label_adj, 'node_mask_list': hist_mask_list}

In [43]:
class Node_Cls_Tasker():
    def __init__(self,args,dataset):
        self.data = dataset
        self.max_time = dataset.max_time
        self.args = args
        self.num_classes = 2
        self.feats_per_node = dataset.feats_per_node
        self.nodes_labels_times = dataset.nodes_labels_times
        self.get_node_feats = self.build_get_node_feats(args,dataset)
        self.prepare_node_feats = self.build_prepare_node_feats(args,dataset)
        self.is_static = False

    def build_get_node_feats(self,args,dataset):
        if args.use_2_hot_node_feats:
            max_deg_out, max_deg_in = get_max_degs(args,dataset,all_window = True)
            self.feats_per_node = max_deg_out + max_deg_in
            def get_node_feats(i,adj):
                return get_2_hot_deg_feats(adj, max_deg_out, max_deg_in, dataset.num_nodes)
        elif args.use_1_hot_node_feats:
            max_deg,_ = get_max_degs(args,dataset)
            self.feats_per_node = max_deg
            def get_node_feats(i,adj):
                return get_1_hot_deg_feats(adj, max_deg, dataset.num_nodes)
        else:
            def get_node_feats(i,adj):
                return dataset.nodes_feats  # [i] I'm ignoring the index since the features for Elliptic are static

        return get_node_feats

    def build_prepare_node_feats(self,args,dataset):
        if args.use_2_hot_node_feats or args.use_1_hot_node_feats:
            def prepare_node_feats(node_feats):
                return sparse_prepare_tensor(node_feats, torch_size= [dataset.num_nodes, self.feats_per_node])
        # elif args.use_1_hot_node_feats:

        else:
            def prepare_node_feats(node_feats):
                return node_feats[0]  # I'll have to check this up

        return prepare_node_feats

    def get_sample(self,idx,test):
        hist_adj_list = []
        hist_ndFeats_list = []
        hist_mask_list = []

        for i in range(idx - self.args.num_hist_steps, idx+1):
            # all edgess included from the beginning
            cur_adj = get_sp_adj(edges = self.data.edges, time = i, weighted = True, time_window = self.args.adj_mat_time_window)  # changed this to keep only a time window

            node_mask = get_node_mask(cur_adj, self.data.num_nodes)
            node_feats = self.get_node_feats(i,cur_adj)
            cur_adj = normalize_adj(adj = cur_adj, num_nodes = self.data.num_nodes)

            hist_adj_list.append(cur_adj)
            hist_ndFeats_list.append(node_feats)
            hist_mask_list.append(node_mask)

        label_adj = self.get_node_labels(idx)

        return {'idx': idx, 'hist_adj_list': hist_adj_list, 'hist_ndFeats_list': hist_ndFeats_list, 'label_sp': label_adj, 'node_mask_list': hist_mask_list}


    def get_node_labels(self,idx):
        """
        window_nodes = get_sp_adj(edges = self.data.edges, time = idx, weighted = False, time_window = self.args.adj_mat_time_window)
        window_nodes = window_nodes['idx'].unique()

        fraud_times = self.data.nodes_labels_times[window_nodes]

        non_fraudulent = ((fraud_times > idx) + (fraud_times == -1))>0
        non_fraudulent = window_nodes[non_fraudulent]

        fraudulent = (fraud_times <= idx) * (fraud_times > max(idx -  self.args.adj_mat_time_window,0))
        fraudulent = window_nodes[fraudulent]

        label_idx = torch.cat([non_fraudulent,fraudulent]).view(-1,1)
        label_vals = torch.cat([torch.zeros(non_fraudulent.size(0)), torch.ones(fraudulent.size(0))])
        """
        node_labels = self.nodes_labels_times
        subset = node_labels[:,2]==idx
        label_idx = node_labels[subset,0]
        label_vals = node_labels[subset,1]

        return {'idx': label_idx, 'vals': label_vals}

In [44]:
class Static_Node_Cls_Tasker(Node_Cls_Tasker):
    def __init__(self,args,dataset):
        self.data = dataset
        self.args = args
        self.num_classes = 2
        self.adj_matrix = get_static_sp_adj(edges = self.data.edges, weighted = False)

        if args.use_2_hot_node_feats:
            max_deg_out, max_deg_in = get_max_degs_static(self.data.num_nodes,self.adj_matrix)
            self.feats_per_node = max_deg_out + max_deg_in
            # print ('feats_per_node',self.feats_per_node ,max_deg_out, max_deg_in)
            self.nodes_feats = get_2_hot_deg_feats(self.adj_matrix, max_deg_out, max_deg_in, dataset.num_nodes)

            # print('XXXX self.nodes_feats',self.nodes_feats)
            self.nodes_feats = sparse_prepare_tensor(self.nodes_feats, torch_size= [self.data.num_nodes,self.feats_per_node], ignore_batch_dim = False)

        else:
            self.feats_per_node = dataset.feats_per_node
            self.nodes_feats = self.data.node_feats

        self.adj_matrix = normalize_adj(adj = self.adj_matrix, num_nodes = self.data.num_nodes)
        self.is_static = True

    def get_sample(self,idx,test):
        # print ('self.adj_matrix',self.adj_matrix.size())
        idx=int(idx)
        # node_feats = self.data.node_feats_dict[idx]
        label = self.data.nodes_labels[idx]

        return {'idx': idx,
                #'node_feats': self.data.node_feats,
                #'adj': self.adj_matrix,
                'label': label
                }

### 构建模型

In [45]:
class Sp_GCN(torch.nn.Module):
    def __init__(self,args,activation):
        super().__init__()
        self.activation = activation
        self.num_layers = args.num_layers

        self.w_list = nn.ParameterList()
        for i in range(self.num_layers):
            if i==0:
                w_i = Parameter(torch.Tensor(args.feats_per_node, args.layer_1_feats))
                reset_param(w_i)
            else:
                w_i = Parameter(torch.Tensor(args.layer_1_feats, args.layer_2_feats))
                reset_param(w_i)
            self.w_list.append(w_i)


    def forward(self,A_list, Nodes_list, nodes_mask_list):
        node_feats = Nodes_list[-1]
        #A_list: T, each element sparse tensor
        #take only last adj matrix in time
        Ahat = A_list[-1]
        #Ahat: NxN ~ 30k
        #sparse multiplication

        # Ahat NxN
        # self.node_embs = Nxk
        #
        # note(bwheatman, tfk): change order of matrix multiply
        last_l = self.activation(Ahat.matmul(node_feats.matmul(self.w_list[0])))
        for i in range(1, self.num_layers):
            last_l = self.activation(Ahat.matmul(last_l.matmul(self.w_list[i])))
        return last_l

In [46]:
class Sp_Skip_GCN(Sp_GCN):
    def __init__(self,args,activation):
        super().__init__(args,activation)
        self.W_feat = Parameter(torch.Tensor(args.feats_per_node, args.layer_1_feats))

    def forward(self,A_list, Nodes_list = None):
        node_feats = Nodes_list[-1]
        #A_list: T, each element sparse tensor
        #take only last adj matrix in time
        Ahat = A_list[-1]
        #Ahat: NxN ~ 30k
        #sparse multiplication

        # Ahat NxN
        # self.node_feats = Nxk
        #
        # note(bwheatman, tfk): change order of matrix multiply
        l1 = self.activation(Ahat.matmul(node_feats.matmul(self.W1)))
        l2 = self.activation(Ahat.matmul(l1.matmul(self.W2)) + (node_feats.matmul(self.W3)))

        return l2

In [47]:
class Sp_Skip_NodeFeats_GCN(Sp_GCN):
    def __init__(self,args,activation):
        super().__init__(args,activation)

    def forward(self,A_list, Nodes_list = None):
        node_feats = Nodes_list[-1]
        Ahat = A_list[-1]
        last_l = self.activation(Ahat.matmul(node_feats.matmul(self.w_list[0])))
        for i in range(1, self.num_layers):
            last_l = self.activation(Ahat.matmul(last_l.matmul(self.w_list[i])))
        skip_last_l = torch.cat((last_l,node_feats), dim=1)   # use node_feats.to_dense() if 2hot encoded input
        
        return skip_last_l

In [48]:
class Sp_GCN_LSTM_A(Sp_GCN):
    def __init__(self,args,activation):
        super().__init__(args,activation)
        self.rnn = nn.LSTM(
            input_size=args.layer_2_feats,
            hidden_size=args.lstm_l2_feats,
            num_layers=args.lstm_l2_layers
        )

    def forward(self,A_list, Nodes_list = None, nodes_mask_list = None):
        last_l_seq=[]
        for t,Ahat in enumerate(A_list):
            node_feats = Nodes_list[t]
            #A_list: T, each element sparse tensor
            #note(bwheatman, tfk): change order of matrix multiply
            last_l = self.activation(Ahat.matmul(node_feats.matmul(self.w_list[0])))
            for i in range(1, self.num_layers):
                last_l = self.activation(Ahat.matmul(last_l.matmul(self.w_list[i])))
            last_l_seq.append(last_l)

        last_l_seq = torch.stack(last_l_seq)

        out, _ = self.rnn(last_l_seq, None)
        return out[-1]

In [49]:
class Sp_GCN_GRU_A(Sp_GCN_LSTM_A):
    def __init__(self,args,activation):
        super().__init__(args,activation)
        self.rnn = nn.GRU(
            input_size=args.layer_2_feats,
            hidden_size=args.lstm_l2_feats,
            num_layers=args.lstm_l2_layers
        )

In [50]:
class Sp_GCN_LSTM_B(Sp_GCN):
    def __init__(self,args,activation):
        super().__init__(args,activation)
        assert args.num_layers == 2, 'GCN-LSTM and GCN-GRU requires 2 conv layers.'
        self.rnn_l1 = nn.LSTM(
            input_size=args.layer_1_feats,
            hidden_size=args.lstm_l1_feats,
            num_layers=args.lstm_l1_layers
        )

        self.rnn_l2 = nn.LSTM(
            input_size=args.layer_2_feats,
            hidden_size=args.lstm_l2_feats,
            num_layers=args.lstm_l2_layers
        )
        self.W2 = Parameter(torch.Tensor(args.lstm_l1_feats, args.layer_2_feats))
        reset_param(self.W2)

    def forward(self,A_list, Nodes_list = None, nodes_mask_list = None):
        l1_seq=[]
        l2_seq=[]
        for t,Ahat in enumerate(A_list):
            node_feats = Nodes_list[t]
            l1 = self.activation(Ahat.matmul(node_feats.matmul(self.w_list[0])))
            l1_seq.append(l1)

        l1_seq = torch.stack(l1_seq)

        out_l1, _ = self.rnn_l1(l1_seq, None)

        for i in range(len(A_list)):
            Ahat = A_list[i]
            out_t_l1 = out_l1[i]
            #A_list: T, each element sparse tensor
            l2 = self.activation(Ahat.matmul(out_t_l1).matmul(self.w_list[1]))
            l2_seq.append(l2)

        l2_seq = torch.stack(l2_seq)

        out, _ = self.rnn_l2(l2_seq, None)
        return out[-1]

In [51]:
class Sp_GCN_GRU_B(Sp_GCN_LSTM_B):
    def __init__(self,args,activation):
        super().__init__(args,activation)
        self.rnn_l1 = nn.GRU(
            input_size=args.layer_1_feats,
            hidden_size=args.lstm_l1_feats,
            num_layers=args.lstm_l1_layers
        )

        self.rnn_l2 = nn.GRU(
            input_size=args.layer_2_feats,
            hidden_size=args.lstm_l2_feats,
            num_layers=args.lstm_l2_layers
        )

In [52]:
class Classifier(torch.nn.Module):
    def __init__(self,args,out_features=2, in_features = None):
        super(Classifier,self).__init__()
        activation = torch.nn.ReLU()

        if in_features is not None:
            num_feats = in_features
        elif args.experiment_type in ['sp_lstm_A_trainer', 'sp_lstm_B_trainer',
                                    'sp_weighted_lstm_A', 'sp_weighted_lstm_B'] :
            num_feats = args.gcn_parameters['lstm_l2_feats'] * 2
        else:
            num_feats = args.gcn_parameters['layer_2_feats'] * 2
        print ('CLS num_feats',num_feats)

        self.mlp = torch.nn.Sequential(
            torch.nn.Linear(
                in_features = num_feats,
                out_features =args.gcn_parameters['cls_feats']
            ),
            activation,
            torch.nn.Linear(
                in_features = args.gcn_parameters['cls_feats'],
                out_features = out_features
            )
        )
        
    def forward(self,x):
        return self.mlp(x)

#### EvolveGCN-O

In [53]:
class TopK_O(torch.nn.Module):
    def __init__(self,feats,k):
        super().__init__()
        self.scorer = Parameter(torch.Tensor(feats,1))
        self.reset_param(self.scorer)
        self.k = k

    def reset_param(self,t):
        # Initialize based on the number of rows
        stdv = 1. / math.sqrt(t.size(0))
        t.data.uniform_(-stdv,stdv)

    def forward(self,node_embs,mask):
        scores = node_embs.matmul(self.scorer) / self.scorer.norm()
        scores = scores + mask

        vals, topk_indices = scores.view(-1).topk(self.k)
        topk_indices = topk_indices[vals > -float("Inf")]

        if topk_indices.size(0) < self.k:
            topk_indices = pad_with_last_val(topk_indices,self.k)
            
        tanh = torch.nn.Tanh()

        if isinstance(node_embs, torch.sparse.FloatTensor) or isinstance(node_embs, torch.cuda.sparse.FloatTensor):
            node_embs = node_embs.to_dense()

        out = node_embs[topk_indices] * tanh(scores[topk_indices].view(-1, 1))

        # we need to transpose the output
        return out.t()

In [54]:
class mat_GRU_gate_O(torch.nn.Module):
    def __init__(self,rows,cols,activation):
        super().__init__()
        self.activation = activation
        # the k here should be in_feats which is actually the rows
        self.W = Parameter(torch.Tensor(rows,rows))
        self.reset_param(self.W)

        self.U = Parameter(torch.Tensor(rows,rows))
        self.reset_param(self.U)

        self.bias = Parameter(torch.zeros(rows,cols))

    def reset_param(self,t):
        # Initialize based on the number of columns
        stdv = 1. / math.sqrt(t.size(1))
        t.data.uniform_(-stdv,stdv)

    def forward(self,x,hidden):
        out = self.activation(self.W.matmul(x) + \
                              self.U.matmul(hidden) + \
                              self.bias)
        return out

In [55]:
class mat_GRU_cell_O(torch.nn.Module):
    def __init__(self,args):
        super().__init__()
        self.args = args
        self.update = mat_GRU_gate_O(args.rows, args.cols, torch.nn.Sigmoid())
        self.reset = mat_GRU_gate_O(args.rows, args.cols, torch.nn.Sigmoid())
        self.htilda = mat_GRU_gate_O(args.rows, args.cols, torch.nn.Tanh())
        self.choose_topk = TopK_O(feats = args.rows, k = args.cols)

    def forward(self,prev_Q): # ,prev_Z,mask):
        # z_topk = self.choose_topk(prev_Z,mask)
        z_topk = prev_Q

        update = self.update(z_topk,prev_Q)
        reset = self.reset(z_topk,prev_Q)

        h_cap = reset * prev_Q
        h_cap = self.htilda(z_topk, h_cap)

        new_Q = (1 - update) * prev_Q + update * h_cap

        return new_Q

In [56]:
class GRCU_O(torch.nn.Module):
    def __init__(self,args):
        super().__init__()
        self.args = args
        cell_args = Namespace({})
        cell_args.rows = args.in_feats
        cell_args.cols = args.out_feats

        self.evolve_weights = mat_GRU_cell_O(cell_args)

        self.activation = self.args.activation
        self.GCN_init_weights = Parameter(torch.Tensor(self.args.in_feats,self.args.out_feats))
        self.reset_param(self.GCN_init_weights)

    def reset_param(self,t):
        # Initialize based on the number of columns
        stdv = 1. / math.sqrt(t.size(1))
        t.data.uniform_(-stdv,stdv)

    def forward(self,A_list,node_embs_list):#,mask_list):
        GCN_weights = self.GCN_init_weights
        out_seq = []
        for t,Ahat in enumerate(A_list):
            node_embs = node_embs_list[t]
            # first evolve the weights from the initial and use the new weights with the node_embs
            GCN_weights = self.evolve_weights(GCN_weights)#,node_embs,mask_list[t])
            node_embs = self.activation(Ahat.matmul(node_embs.matmul(GCN_weights)))

            out_seq.append(node_embs)

        return out_seq

In [57]:
class EGCN_O(torch.nn.Module):
    def __init__(self, args, activation, device='cpu', skipfeats=False):
        super().__init__()
        GRCU_args = Namespace({})

        feats = [args.feats_per_node, args.layer_1_feats, args.layer_2_feats]
        self.device = device
        self.skipfeats = skipfeats
        self.GRCU_layers = []
        self._parameters = nn.ParameterList()
        for i in range(1,len(feats)):
            GRCU_args = Namespace({
                'in_feats' : feats[i-1],
                'out_feats': feats[i],
                'activation': activation
            })

            grcu_i = GRCU_O(GRCU_args)
            # print (i,'grcu_i', grcu_i)
            self.GRCU_layers.append(grcu_i.to(self.device))
            self._parameters.extend(list(self.GRCU_layers[-1].parameters()))

    def parameters(self):
        return self._parameters

    def forward(self,A_list, Nodes_list,nodes_mask_list):
        node_feats= Nodes_list[-1]

        for unit in self.GRCU_layers:
            Nodes_list = unit(A_list,Nodes_list)  # ,nodes_mask_list)

        out = Nodes_list[-1]
        if self.skipfeats:
            out = torch.cat((out,node_feats), dim=1)   # use node_feats.to_dense() if 2hot encoded input 
        return out

#### EvolveGCN-H

In [58]:
class TopK_H(torch.nn.Module):
    def __init__(self,feats,k):
        super().__init__()
        self.scorer = Parameter(torch.Tensor(feats,1))
        self.reset_param(self.scorer)
        
        self.k = k

    def reset_param(self,t):
        # Initialize based on the number of rows
        stdv = 1. / math.sqrt(t.size(0))
        t.data.uniform_(-stdv,stdv)

    def forward(self,node_embs,mask):
        scores = node_embs.matmul(self.scorer) / self.scorer.norm()
        scores = scores + mask

        vals, topk_indices = scores.view(-1).topk(self.k)
        topk_indices = topk_indices[vals > -float("Inf")]

        if topk_indices.size(0) < self.k:
            topk_indices = pad_with_last_val(topk_indices,self.k)
            
        tanh = torch.nn.Tanh()

        if isinstance(node_embs, torch.sparse.FloatTensor) or \
           isinstance(node_embs, torch.cuda.sparse.FloatTensor):
            node_embs = node_embs.to_dense()

        out = node_embs[topk_indices] * tanh(scores[topk_indices].view(-1,1))

        # we need to transpose the output
        return out.t()

In [59]:
class mat_GRU_gate_H(torch.nn.Module):
    def __init__(self,rows,cols,activation):
        super().__init__()
        self.activation = activation
        #the k here should be in_feats which is actually the rows
        self.W = Parameter(torch.Tensor(rows,rows))
        self.reset_param(self.W)

        self.U = Parameter(torch.Tensor(rows,rows))
        self.reset_param(self.U)

        self.bias = Parameter(torch.zeros(rows,cols))

    def reset_param(self,t):
        # Initialize based on the number of columns
        stdv = 1. / math.sqrt(t.size(1))
        t.data.uniform_(-stdv,stdv)

    def forward(self,x,hidden):
        out = self.activation(self.W.matmul(x) + \
                              self.U.matmul(hidden) + \
                              self.bias)
        return out

In [60]:
class mat_GRU_cell_H(torch.nn.Module):
    def __init__(self,args):
        super().__init__()
        self.args = args
        self.update = mat_GRU_gate_H(args.rows, args.cols, torch.nn.Sigmoid())
        self.reset = mat_GRU_gate_H(args.rows, args.cols, torch.nn.Sigmoid())
        self.htilda = mat_GRU_gate_H(args.rows, args.cols, torch.nn.Tanh())
        self.choose_topk = TopK_H(feats = args.rows, k = args.cols)

    def forward(self,prev_Q,prev_Z,mask):
        z_topk = self.choose_topk(prev_Z,mask)

        update = self.update(z_topk,prev_Q)
        reset = self.reset(z_topk,prev_Q)

        h_cap = reset * prev_Q
        h_cap = self.htilda(z_topk, h_cap)

        new_Q = (1 - update) * prev_Q + update * h_cap

        return new_Q

In [61]:
class GRCU_H(torch.nn.Module):
    def __init__(self,args):
        super().__init__()
        self.args = args
        cell_args = Namespace({})
        cell_args.rows = args.in_feats
        cell_args.cols = args.out_feats

        self.evolve_weights = mat_GRU_cell_H(cell_args)

        self.activation = self.args.activation
        self.GCN_init_weights = Parameter(torch.Tensor(self.args.in_feats,self.args.out_feats))
        self.reset_param(self.GCN_init_weights)

    def reset_param(self,t):
        # Initialize based on the number of columns
        stdv = 1. / math.sqrt(t.size(1))
        t.data.uniform_(-stdv,stdv)

    def forward(self,A_list,node_embs_list,mask_list):
        GCN_weights = self.GCN_init_weights
        out_seq = []
        for t,Ahat in enumerate(A_list):
            node_embs = node_embs_list[t]
            #first evolve the weights from the initial and use the new weights with the node_embs
            GCN_weights = self.evolve_weights(GCN_weights,node_embs,mask_list[t])
            node_embs = self.activation(Ahat.matmul(node_embs.matmul(GCN_weights)))

            out_seq.append(node_embs)

        return out_seq

In [62]:
class EGCN_H(torch.nn.Module):
    def __init__(self, args, activation, device='cpu', skipfeats=False):
        super().__init__()
        GRCU_args = Namespace({})

        feats = [args.feats_per_node,
                 args.layer_1_feats,
                 args.layer_2_feats]
        self.device = device
        self.skipfeats = skipfeats
        self.GRCU_layers = []
        self._parameters = nn.ParameterList()
        for i in range(1,len(feats)):
            GRCU_args = Namespace({
                'in_feats' : feats[i-1],
                'out_feats': feats[i],
                'activation': activation
            })

            grcu_i = GRCU_H(GRCU_args)
            #print (i,'grcu_i', grcu_i)
            self.GRCU_layers.append(grcu_i.to(self.device))
            self._parameters.extend(list(self.GRCU_layers[-1].parameters()))

    def parameters(self):
        return self._parameters

    def forward(self,A_list, Nodes_list,nodes_mask_list):
        node_feats= Nodes_list[-1]

        for unit in self.GRCU_layers:
            Nodes_list = unit(A_list,Nodes_list,nodes_mask_list)

        out = Nodes_list[-1]
        if self.skipfeats:
            out = torch.cat((out,node_feats), dim=1)   # use node_feats.to_dense() if 2hot encoded input 
        return out

#### Split Dataset

In [63]:
class splitter():
    '''
    creates 3 splits
    train
    dev
    test
    '''
    def __init__(self,args,tasker):
        if tasker.is_static: #### For static datsets
            assert args.train_proportion + args.dev_proportion < 1, \
                'there\'s no space for test samples'
            # only the training one requires special handling on start, the others are fine with the split IDX.
            
            random_perm=False
            indexes = tasker.data.nodes_with_label
            
            if random_perm:
                perm_idx = torch.randperm(indexes.size(0))
                perm_idx = indexes[perm_idx]
            else:
                print ('tasker.data.nodes',indexes.size())
                perm_idx, _ = indexes.sort()
            # print ('perm_idx',perm_idx[:10])
            
            self.train_idx = perm_idx[:int(args.train_proportion*perm_idx.size(0))]
            self.dev_idx = perm_idx[int(args.train_proportion*perm_idx.size(0)): int((args.train_proportion+args.dev_proportion)*perm_idx.size(0))]
            self.test_idx = perm_idx[int((args.train_proportion+args.dev_proportion)*perm_idx.size(0)):]
            # print ('train,dev,test',self.train_idx.size(), self.dev_idx.size(), self.test_idx.size())
            
            train = static_data_split(tasker, self.train_idx, test = False)
            train = DataLoader(train, shuffle=True,**args.data_loading_params)
            
            dev = static_data_split(tasker, self.dev_idx, test = True)
            dev = DataLoader(dev, shuffle=False,**args.data_loading_params)
            
            test = static_data_split(tasker, self.test_idx, test = True)
            test = DataLoader(test, shuffle=False,**args.data_loading_params)
                        
            self.tasker = tasker
            self.train = train
            self.dev = dev
            self.test = test
            
            
        else: #### For datsets with time
            assert args.train_proportion + args.dev_proportion < 1, \
                'there\'s no space for test samples'
            # only the training one requires special handling on start, the others are fine with the split IDX.
            start = tasker.data.min_time + args.num_hist_steps #-1 + args.adj_mat_time_window
            end = args.train_proportion
            
            end = int(np.floor(tasker.data.max_time.type(torch.float) * end))
            train = data_split(tasker, start, end, test = False)
            train = DataLoader(train,**args.data_loading_params)
    
            start = end
            end = args.dev_proportion + args.train_proportion
            end = int(np.floor(tasker.data.max_time.type(torch.float) * end))
            if args.task == 'link_pred':
                dev = data_split(tasker, start, end, test = True, all_edges=True)
            else:
                dev = data_split(tasker, start, end, test = True)

            dev = DataLoader(dev,num_workers=args.data_loading_params['num_workers'])
            
            start = end
            
            # the +1 is because I assume that max_time exists in the dataset
            end = int(tasker.max_time) + 1
            if args.task == 'link_pred':
                test = data_split(tasker, start, end, test = True, all_edges=True)
            else:
                test = data_split(tasker, start, end, test = True)
                
            test = DataLoader(test,num_workers=args.data_loading_params['num_workers'])
            
            print ('Dataset splits sizes:  train',len(train), 'dev',len(dev), 'test',len(test))
            
            self.tasker = tasker
            self.train = train
            self.dev = dev
            self.test = test

In [64]:
class data_split(Dataset):
    def __init__(self, tasker, start, end, test, **kwargs):
        '''start and end are indices indicating what items belong to this split'''
        self.tasker = tasker
        self.start = start
        self.end = end
        self.test = test
        self.kwargs = kwargs

    def __len__(self):
        return self.end-self.start

    def __getitem__(self,idx):
        idx = self.start + idx
        t = self.tasker.get_sample(idx, test = self.test, **self.kwargs)
        return t

In [65]:
class static_data_split(Dataset):
    def __init__(self, tasker, indexes, test):
        '''
        start and end are indices indicating what items belong to this split
        '''
        self.tasker = tasker
        self.indexes = indexes
        self.test = test
        self.adj_matrix = tasker.adj_matrix

    def __len__(self):
        return len(self.indexes)

    def __getitem__(self,idx):
        idx = self.indexes[idx]
        return self.tasker.get_sample(idx,test = self.test)

#### Cross_Entropy

In [66]:
class Cross_Entropy(torch.nn.Module):
    """docstring for Cross_Entropy"""
    def __init__(self, args, dataset):
        super().__init__()
        weights = torch.tensor(args.class_weights).to(args.device)

        self.weights = self.dyn_scale(args.task, dataset, weights)
        
    
    def dyn_scale(self,task,dataset,weights):
        # if task == 'link_pred':  commented to have a 1:1 ratio

        #     '''
        #     when doing link prediction there is an extra weighting factor on the non-existing
        #     edges
        #     '''
        #     tot_neg = dataset.num_non_existing
        #     def scale(labels):
        #         cur_neg = (labels == 0).sum(dtype = torch.float)
        #         out = weights.clone()
        #         out[0] *= tot_neg/cur_neg
        #         return out
        # else:
        #     def scale(labels):
        #         return weights
        def scale(labels):
            return weights
        return scale
    

    def logsumexp(self,logits):
        m,_ = torch.max(logits,dim=1)
        m = m.view(-1,1)
        sum_exp = torch.sum(torch.exp(logits-m),dim=1, keepdim=True)
        return m + torch.log(sum_exp)
    
    def forward(self,logits,labels):
        '''
        logits is a matrix M by C where m is the number of classifications and C are the number of classes
        labels is a integer tensor of size M where each element corresponds to the class that prediction i
        should be matching to
        '''
        labels = labels.view(-1,1)
        alpha = self.weights(labels)[labels].view(-1,1)
        loss = alpha * (- logits.gather(-1,labels) + self.logsumexp(logits))
        return loss.mean()

#### Logger

In [67]:
class Logger():
    def __init__(self, args, num_classes, minibatch_log_interval=10):

        if args is not None:
            currdate=str(datetime.datetime.today().strftime('%Y%m%d%H%M%S'))
            self.log_name= 'log/log_'+args.data+'_'+args.task+'_'+args.model+'_'+currdate+'_r'+str(args.rank)+'.log'

            if args.use_logfile:
                print ("Log file:", self.log_name)
                logging.basicConfig(filename=self.log_name, level=logging.INFO)
            else:
                print ("Log: STDOUT")
                logging.basicConfig(stream=sys.stdout, level=logging.INFO)

            logging.info ('*** PARAMETERS ***')
            logging.info (pprint.pformat(args.__dict__)) # displays the string
            logging.info ('')
        else:
            print ("Log: STDOUT")
            logging.basicConfig(stream=sys.stdout, level=logging.INFO)

        self.num_classes = num_classes
        self.minibatch_log_interval = minibatch_log_interval
        self.eval_k_list = [10, 100, 1000]
        self.args = args


    def get_log_file_name(self):
        return self.log_name

    def log_epoch_start(self, epoch, num_minibatches, set, minibatch_log_interval=None):
        #ALDO
        self.epoch = epoch
        ######
        self.set = set
        self.losses = []
        self.errors = []
        self.MRRs = []
        self.MAPs = []
        #self.time_step_sizes = []
        self.conf_mat_tp = {}
        self.conf_mat_fn = {}
        self.conf_mat_fp = {}
        self.conf_mat_tp_at_k = {}
        self.conf_mat_fn_at_k = {}
        self.conf_mat_fp_at_k = {}
        for k in self.eval_k_list:
            self.conf_mat_tp_at_k[k] = {}
            self.conf_mat_fn_at_k[k] = {}
            self.conf_mat_fp_at_k[k] = {}

        for cl in range(self.num_classes):
            self.conf_mat_tp[cl]=0
            self.conf_mat_fn[cl]=0
            self.conf_mat_fp[cl]=0
            for k in self.eval_k_list:
                self.conf_mat_tp_at_k[k][cl]=0
                self.conf_mat_fn_at_k[k][cl]=0
                self.conf_mat_fp_at_k[k][cl]=0

        if self.set == "TEST":
            self.conf_mat_tp_list = {}
            self.conf_mat_fn_list = {}
            self.conf_mat_fp_list = {}
            for cl in range(self.num_classes):
                self.conf_mat_tp_list[cl]=[]
                self.conf_mat_fn_list[cl]=[]
                self.conf_mat_fp_list[cl]=[]

        self.batch_sizes=[]
        self.minibatch_done = 0
        self.num_minibatches = num_minibatches
        if minibatch_log_interval is not None:
            self.minibatch_log_interval = minibatch_log_interval
        logging.info('################ '+set+' epoch '+str(epoch)+' ###################')
        self.lasttime = time.monotonic()
        self.ep_time = self.lasttime

    def log_minibatch(self, predictions, true_classes, loss, **kwargs):

        probs = torch.softmax(predictions,dim=1)[:,1]
        if self.set in ['TEST', 'VALID'] and self.args.task == 'link_pred':
            MRR = self.get_MRR(probs,true_classes, kwargs['adj'],do_softmax=False)
        else:
            MRR = torch.tensor([0.0])

        MAP = torch.tensor(self.get_MAP(probs,true_classes, do_softmax=False))

        error, conf_mat_per_class = self.eval_predicitions(predictions, true_classes, self.num_classes)
        conf_mat_per_class_at_k={}
        for k in self.eval_k_list:
            conf_mat_per_class_at_k[k] = self.eval_predicitions_at_k(predictions, true_classes, self.num_classes, k)

        batch_size = predictions.size(0)
        self.batch_sizes.append(batch_size)

        self.losses.append(loss) #loss.detach()
        self.errors.append(error)
        self.MRRs.append(MRR)
        self.MAPs.append(MAP)
        for cl in range(self.num_classes):
            self.conf_mat_tp[cl]+=conf_mat_per_class.true_positives[cl]
            self.conf_mat_fn[cl]+=conf_mat_per_class.false_negatives[cl]
            self.conf_mat_fp[cl]+=conf_mat_per_class.false_positives[cl]
            for k in self.eval_k_list:
                self.conf_mat_tp_at_k[k][cl]+=conf_mat_per_class_at_k[k].true_positives[cl]
                self.conf_mat_fn_at_k[k][cl]+=conf_mat_per_class_at_k[k].false_negatives[cl]
                self.conf_mat_fp_at_k[k][cl]+=conf_mat_per_class_at_k[k].false_positives[cl]
            if self.set == "TEST":
                self.conf_mat_tp_list[cl].append(conf_mat_per_class.true_positives[cl])
                self.conf_mat_fn_list[cl].append(conf_mat_per_class.false_negatives[cl])
                self.conf_mat_fp_list[cl].append(conf_mat_per_class.false_positives[cl])

        self.minibatch_done+=1
        if self.minibatch_done%self.minibatch_log_interval==0:
            mb_error = self.calc_epoch_metric(self.batch_sizes, self.errors)
            mb_MRR = self.calc_epoch_metric(self.batch_sizes, self.MRRs)
            mb_MAP = self.calc_epoch_metric(self.batch_sizes, self.MAPs)
            partial_losses = torch.stack(self.losses)
            logging.info(self.set+ ' batch %d / %d - partial error %0.4f - partial loss %0.4f - partial MRR  %0.4f - partial MAP %0.4f' % (self.minibatch_done, self.num_minibatches, mb_error, partial_losses.mean(), mb_MRR, mb_MAP))

            tp=conf_mat_per_class.true_positives
            fn=conf_mat_per_class.false_negatives
            fp=conf_mat_per_class.false_positives
            logging.info(self.set+' batch %d / %d -  partial tp %s,fn %s,fp %s' % (self.minibatch_done, self.num_minibatches, tp, fn, fp))
            precision, recall, f1 = self.calc_microavg_eval_measures(tp, fn, fp)
            logging.info (self.set+' batch %d / %d - measures partial microavg - precision %0.4f - recall %0.4f - f1 %0.4f ' % (self.minibatch_done, self.num_minibatches, precision,recall,f1))
            for cl in range(self.num_classes):
                cl_precision, cl_recall, cl_f1 = self.calc_eval_measures_per_class(tp, fn, fp, cl)
                logging.info (self.set+' batch %d / %d - measures partial for class %d - precision %0.4f - recall %0.4f - f1 %0.4f ' % (self.minibatch_done, self.num_minibatches, cl,cl_precision,cl_recall,cl_f1))

            logging.info (self.set+' batch %d / %d - Batch time %d ' % (self.minibatch_done, self.num_minibatches, (time.monotonic()-self.lasttime) ))

        self.lasttime=time.monotonic()

    def log_epoch_done(self):
        eval_measure = 0

        self.losses = torch.stack(self.losses)
        logging.info(self.set+' mean losses '+ str(self.losses.mean()))
        if self.args.target_measure=='loss' or self.args.target_measure=='Loss':
            eval_measure = self.losses.mean()

        epoch_error = self.calc_epoch_metric(self.batch_sizes, self.errors)
        logging.info(self.set+' mean errors '+ str(epoch_error))

        epoch_MRR = self.calc_epoch_metric(self.batch_sizes, self.MRRs)
        epoch_MAP = self.calc_epoch_metric(self.batch_sizes, self.MAPs)
        logging.info(self.set+' mean MRR '+ str(epoch_MRR)+' - mean MAP '+ str(epoch_MAP))
        if self.args.target_measure=='MRR' or self.args.target_measure=='mrr':
            eval_measure = epoch_MRR
        if self.args.target_measure=='MAP' or self.args.target_measure=='map':
            eval_measure = epoch_MAP

        logging.info(self.set+' tp %s,fn %s,fp %s' % (self.conf_mat_tp, self.conf_mat_fn, self.conf_mat_fp))
        precision, recall, f1 = self.calc_microavg_eval_measures(self.conf_mat_tp, self.conf_mat_fn, self.conf_mat_fp)
        logging.info (self.set+' measures microavg - precision %0.4f - recall %0.4f - f1 %0.4f ' % (precision,recall,f1))
        if str(self.args.target_class) == 'AVG':
            if self.args.target_measure=='Precision' or self.args.target_measure=='prec':
                eval_measure = precision
            elif self.args.target_measure=='Recall' or self.args.target_measure=='rec':
                eval_measure = recall
            else:
                eval_measure = f1


        for cl in range(self.num_classes):
            cl_precision, cl_recall, cl_f1 = self.calc_eval_measures_per_class(self.conf_mat_tp, self.conf_mat_fn, self.conf_mat_fp, cl)
            logging.info (self.set+' measures for class %d - precision %0.4f - recall %0.4f - f1 %0.4f ' % (cl,cl_precision,cl_recall,cl_f1))
            if str(cl) == str(self.args.target_class):
                if self.args.target_measure=='Precision' or self.args.target_measure=='prec':
                    eval_measure = cl_precision
                elif self.args.target_measure=='Recall' or self.args.target_measure=='rec':
                    eval_measure = cl_recall
                else:
                    eval_measure = cl_f1

        for k in self.eval_k_list: #logging.info(self.set+' @%d tp %s,fn %s,fp %s' % (k, self.conf_mat_tp_at_k[k], self.conf_mat_fn_at_k[k], self.conf_mat_fp_at_k[k]))
            precision, recall, f1 = self.calc_microavg_eval_measures(self.conf_mat_tp_at_k[k], self.conf_mat_fn_at_k[k], self.conf_mat_fp_at_k[k])
            logging.info (self.set+' measures@%d microavg - precision %0.4f - recall %0.4f - f1 %0.4f ' % (k,precision,recall,f1))

            for cl in range(self.num_classes):
                cl_precision, cl_recall, cl_f1 = self.calc_eval_measures_per_class(self.conf_mat_tp_at_k[k], self.conf_mat_fn_at_k[k], self.conf_mat_fp_at_k[k], cl)
                logging.info (self.set+' measures@%d for class %d - precision %0.4f - recall %0.4f - f1 %0.4f ' % (k, cl,cl_precision,cl_recall,cl_f1))


        logging.info (self.set+' Total epoch time: '+ str(((time.monotonic()-self.ep_time))))

        return eval_measure

    def get_MRR(self,predictions,true_classes, adj ,do_softmax=False):
        if do_softmax:
            probs = torch.softmax(predictions,dim=1)[:,1]
        else:
            probs = predictions

        probs = probs.cpu().numpy()
        true_classes = true_classes.cpu().numpy()
        adj = adj.cpu().numpy()

        pred_matrix = coo_matrix((probs,(adj[0],adj[1]))).toarray()
        true_matrix = coo_matrix((true_classes,(adj[0],adj[1]))).toarray()

        row_MRRs = []
        for i,pred_row in enumerate(pred_matrix):
            #check if there are any existing edges
            if np.isin(1,true_matrix[i]):
                row_MRRs.append(self.get_row_MRR(pred_row,true_matrix[i]))

        avg_MRR = torch.tensor(row_MRRs).mean()
        return avg_MRR

    def get_row_MRR(self,probs,true_classes):
        existing_mask = true_classes == 1
        #descending in probability
        ordered_indices = np.flip(probs.argsort())

        ordered_existing_mask = existing_mask[ordered_indices]

        existing_ranks = np.arange(1,
                                   true_classes.shape[0]+1,
                                   dtype=np.float)[ordered_existing_mask]

        MRR = (1/existing_ranks).sum()/existing_ranks.shape[0]
        return MRR


    def get_MAP(self,predictions,true_classes, do_softmax=False):
        if do_softmax:
            probs = torch.softmax(predictions,dim=1)[:,1]
        else:
            probs = predictions

        predictions_np = probs.detach().cpu().numpy()
        true_classes_np = true_classes.detach().cpu().numpy()

        return average_precision_score(true_classes_np, predictions_np)

    def eval_predicitions(self, predictions, true_classes, num_classes):
        predicted_classes = predictions.argmax(dim=1)
        failures = (predicted_classes!=true_classes).sum(dtype=torch.float)
        error = failures/predictions.size(0)

        conf_mat_per_class = Namespace({})
        conf_mat_per_class.true_positives = {}
        conf_mat_per_class.false_negatives = {}
        conf_mat_per_class.false_positives = {}

        for cl in range(num_classes):
            cl_indices = true_classes == cl

            pos = predicted_classes == cl
            hits = (predicted_classes[cl_indices] == true_classes[cl_indices])

            tp = hits.sum()
            fn = hits.size(0) - tp
            fp = pos.sum() - tp

            conf_mat_per_class.true_positives[cl] = tp
            conf_mat_per_class.false_negatives[cl] = fn
            conf_mat_per_class.false_positives[cl] = fp
        return error, conf_mat_per_class


    def eval_predicitions_at_k(self, predictions, true_classes, num_classes, k):
        conf_mat_per_class = Namespace({})
        conf_mat_per_class.true_positives = {}
        conf_mat_per_class.false_negatives = {}
        conf_mat_per_class.false_positives = {}

        if predictions.size(0)<k:
            k=predictions.size(0)

        for cl in range(num_classes):
            # sort for prediction with higher score for target class (cl)
            _, idx_preds_at_k = torch.topk(predictions[:,cl], k, dim=0, largest=True, sorted=True)
            predictions_at_k = predictions[idx_preds_at_k]
            predicted_classes = predictions_at_k.argmax(dim=1)

            cl_indices_at_k = true_classes[idx_preds_at_k] == cl
            cl_indices = true_classes == cl

            pos = predicted_classes == cl
            hits = (predicted_classes[cl_indices_at_k] == true_classes[idx_preds_at_k][cl_indices_at_k])

            tp = hits.sum()
            fn = true_classes[cl_indices].size(0) - tp # This only if we want to consider the size at K -> hits.size(0) - tp
            fp = pos.sum() - tp

            conf_mat_per_class.true_positives[cl] = tp
            conf_mat_per_class.false_negatives[cl] = fn
            conf_mat_per_class.false_positives[cl] = fp
        return conf_mat_per_class


    def calc_microavg_eval_measures(self, tp, fn, fp):
        tp_sum = sum(tp.values()).item()
        fn_sum = sum(fn.values()).item()
        fp_sum = sum(fp.values()).item()

        p = tp_sum*1.0 / (tp_sum+fp_sum)
        r = tp_sum*1.0 / (tp_sum+fn_sum)
        if (p+r)>0:
            f1 = 2.0 * (p*r) / (p+r)
        else:
            f1 = 0
        return p, r, f1

    def calc_eval_measures_per_class(self, tp, fn, fp, class_id):
        #ALDO
        if type(tp) is dict:
            tp_sum = tp[class_id].item()
            fn_sum = fn[class_id].item()
            fp_sum = fp[class_id].item()
        else:
            tp_sum = tp.item()
            fn_sum = fn.item()
            fp_sum = fp.item()
        ########
        if tp_sum==0:
            return 0,0,0

        p = tp_sum*1.0 / (tp_sum+fp_sum)
        r = tp_sum*1.0 / (tp_sum+fn_sum)
        if (p+r)>0:
            f1 = 2.0 * (p*r) / (p+r)
        else:
            f1 = 0
        return p, r, f1

    def calc_epoch_metric(self,batch_sizes, metric_val):
        batch_sizes = torch.tensor(batch_sizes, dtype = torch.float)
        epoch_metric_val = torch.stack(metric_val).cpu() * batch_sizes
        epoch_metric_val = epoch_metric_val.sum()/batch_sizes.sum()

        return epoch_metric_val.detach().item()

#### Trainer

In [68]:
class Trainer():
    def __init__(self,args, splitter, gcn, classifier, comp_loss, dataset, num_classes):
        self.args = args
        self.splitter = splitter
        self.tasker = splitter.tasker
        self.gcn = gcn
        self.classifier = classifier
        self.comp_loss = comp_loss

        self.num_nodes = dataset.num_nodes
        self.data = dataset
        self.num_classes = num_classes

        self.logger = Logger(args, self.num_classes)

        self.init_optimizers(args)

        if self.tasker.is_static:
            adj_matrix = sparse_prepare_tensor(self.tasker.adj_matrix, torch_size = [self.num_nodes], ignore_batch_dim = False)
            self.hist_adj_list = [adj_matrix]
            self.hist_ndFeats_list = [self.tasker.nodes_feats.float()]

    def init_optimizers(self,args):
        params = self.gcn.parameters()
        self.gcn_opt = torch.optim.Adam(params, lr = args.learning_rate)
        params = self.classifier.parameters()
        self.classifier_opt = torch.optim.Adam(params, lr = args.learning_rate)
        self.gcn_opt.zero_grad()
        self.classifier_opt.zero_grad()

    def save_checkpoint(self, state, filename='checkpoint.pth.tar'):
        torch.save(state, filename)

    def load_checkpoint(self, filename, model):
        if os.path.isfile(filename):
            print("=> loading checkpoint '{}'".format(filename))
            checkpoint = torch.load(filename)
            epoch = checkpoint['epoch']
            self.gcn.load_state_dict(checkpoint['gcn_dict'])
            self.classifier.load_state_dict(checkpoint['classifier_dict'])
            self.gcn_opt.load_state_dict(checkpoint['gcn_optimizer'])
            self.classifier_opt.load_state_dict(checkpoint['classifier_optimizer'])
            self.logger.log_str("=> loaded checkpoint '{}' (epoch {})".format(filename, checkpoint['epoch']))
            return epoch
        
        else:
            self.logger.log_str("=> no checkpoint found at '{}'".format(filename))
            return 0

    def train(self):
        self.tr_step = 0
        best_eval_valid = 0
        eval_valid = 0
        epochs_without_impr = 0

        for e in range(self.args.num_epochs):
            eval_train, nodes_embs = self.run_epoch(self.splitter.train, e, 'TRAIN', grad = True)
            if len(self.splitter.dev) > 0 and e > self.args.eval_after_epochs:
                eval_valid, _ = self.run_epoch(self.splitter.dev, e, 'VALID', grad = False)
                if eval_valid > best_eval_valid:
                    best_eval_valid = eval_valid
                    epochs_without_impr = 0
                    print ('### w' + str(self.args.rank) + ') ep ' + str(e) + ' - Best valid measure:' + str(eval_valid))
                else:
                    epochs_without_impr+=1
                    if epochs_without_impr>self.args.early_stop_patience:
                        print ('### w'+str(self.args.rank)+') ep '+str(e)+' - Early stop.')
                        break

            if len(self.splitter.test)>0 and eval_valid==best_eval_valid and e>self.args.eval_after_epochs:
                eval_test, _ = self.run_epoch(self.splitter.test, e, 'TEST', grad = False)

                if self.args.save_node_embeddings:
                    self.save_node_embs_csv(nodes_embs, self.splitter.train_idx, log_file+'_train_nodeembs.csv.gz')
                    self.save_node_embs_csv(nodes_embs, self.splitter.dev_idx, log_file+'_valid_nodeembs.csv.gz')
                    self.save_node_embs_csv(nodes_embs, self.splitter.test_idx, log_file+'_test_nodeembs.csv.gz')


    def run_epoch(self, split, epoch, set_name, grad):
        t0 = time.time()
        log_interval=999
        if set_name=='TEST':
            log_interval=1
        self.logger.log_epoch_start(epoch, len(split), set_name, minibatch_log_interval=log_interval)

        torch.set_grad_enabled(grad)
        for s in split:
            if self.tasker.is_static:
                s = self.prepare_static_sample(s)
            else:
                s = self.prepare_sample(s)

            predictions, nodes_embs = self.predict(s.hist_adj_list, s.hist_ndFeats_list, s.label_sp['idx'], s.node_mask_list)

            loss = self.comp_loss(predictions,s.label_sp['vals'])
            # print(loss)
            
            if set_name in ['TEST', 'VALID'] and self.args.task == 'link_pred':
                self.logger.log_minibatch(predictions, s.label_sp['vals'], loss.detach(), adj = s.label_sp['idx'])
            else:
                self.logger.log_minibatch(predictions, s.label_sp['vals'], loss.detach())
                
            if grad:
                self.optim_step(loss)

        torch.set_grad_enabled(True)
        eval_measure = self.logger.log_epoch_done()

        return eval_measure, nodes_embs

    def predict(self,hist_adj_list,hist_ndFeats_list,node_indices,mask_list):
        nodes_embs = self.gcn(hist_adj_list, hist_ndFeats_list, mask_list)

        predict_batch_size = 100000
        gather_predictions=[]
        for i in range(1 +(node_indices.size(1)//predict_batch_size)):
            cls_input = self.gather_node_embs(nodes_embs, node_indices[:, i*predict_batch_size:(i+1)*predict_batch_size])
            predictions = self.classifier(cls_input)
            gather_predictions.append(predictions)
        gather_predictions=torch.cat(gather_predictions, dim=0)
        return gather_predictions, nodes_embs

    def gather_node_embs(self,nodes_embs,node_indices):
        cls_input = []

        for node_set in node_indices:
            cls_input.append(nodes_embs[node_set])
        return torch.cat(cls_input,dim = 1)

    def optim_step(self,loss):
        self.tr_step += 1
        loss.backward()

        if self.tr_step % self.args.steps_accum_gradients == 0:
            self.gcn_opt.step()
            self.classifier_opt.step()

            self.gcn_opt.zero_grad()
            self.classifier_opt.zero_grad()


    def prepare_sample(self,sample):
        sample = Namespace(sample)
        for i,adj in enumerate(sample.hist_adj_list):
            adj = sparse_prepare_tensor(adj,torch_size = [self.num_nodes])
            sample.hist_adj_list[i] = adj.to(self.args.device)

            nodes = self.tasker.prepare_node_feats(sample.hist_ndFeats_list[i])

            sample.hist_ndFeats_list[i] = nodes.to(self.args.device)
            node_mask = sample.node_mask_list[i]
            sample.node_mask_list[i] = node_mask.to(self.args.device).t() #transposed to have same dimensions as scorer

        label_sp = self.ignore_batch_dim(sample.label_sp)

        if self.args.task in ["link_pred", "edge_cls"]:
            label_sp['idx'] = label_sp['idx'].to(self.args.device).t()   ####### ALDO TO CHECK why there was the .t() -----> because I concatenate embeddings when there are pairs of them, the embeddings are row vectors after the transpose
        else:
            label_sp['idx'] = label_sp['idx'].to(self.args.device)

        label_sp['vals'] = label_sp['vals'].type(torch.long).to(self.args.device)
        sample.label_sp = label_sp

        return sample

    def prepare_static_sample(self,sample):
        sample = Namespace(sample)

        sample.hist_adj_list = self.hist_adj_list

        sample.hist_ndFeats_list = self.hist_ndFeats_list

        label_sp = {}
        label_sp['idx'] =  [sample.idx]
        label_sp['vals'] = sample.label
        sample.label_sp = label_sp

        return sample

    def ignore_batch_dim(self,adj):
        if self.args.task in ["link_pred", "edge_cls"]:
            adj['idx'] = adj['idx'][0]
        adj['vals'] = adj['vals'][0]
        return adj

    def save_node_embs_csv(self, nodes_embs, indexes, file_name):
        csv_node_embs = []
        for node_id in indexes:
            orig_ID = torch.DoubleTensor([self.tasker.data.contID_to_origID[node_id]])

            csv_node_embs.append(torch.cat((orig_ID,nodes_embs[node_id].double())).detach().numpy())

        pd.DataFrame(np.array(csv_node_embs)).to_csv(file_name, header=None, index=None, compression='gzip')
        # print ('Node embs saved in',file_name)

#### 参数设置

In [69]:
def random_param_value(param, param_min, param_max, type='int'):
    if str(param) is None or str(param).lower()=='none':
        if type=='int':
            return random.randrange(param_min, param_max+1)
        elif type=='logscale':
            interval=np.logspace(np.log10(param_min), np.log10(param_max), num=100)
            return np.random.choice(interval, 1)[0]
        else:
            return random.uniform(param_min, param_max)
    else:
        return param

In [70]:
def build_random_hyper_params(args):
    if args.model == 'all':
        model_types = ['gcn', 'egcn_o', 'egcn_h', 'gruA', 'gruB','egcn','lstmA', 'lstmB']
        args.model=model_types[args.rank]
    elif args.model == 'all_nogcn':
        model_types = ['egcn_o', 'egcn_h', 'gruA', 'gruB','egcn','lstmA', 'lstmB']
        args.model=model_types[args.rank]
    elif args.model == 'all_noegcn3':
        model_types = ['gcn', 'egcn_h', 'gruA', 'gruB','egcn','lstmA', 'lstmB']
        args.model=model_types[args.rank]
    elif args.model == 'all_nogruA':
        model_types = ['gcn', 'egcn_o', 'egcn_h', 'gruB','egcn','lstmA', 'lstmB']
        args.model=model_types[args.rank]
        args.model=model_types[args.rank]
    elif args.model == 'saveembs':
        model_types = ['gcn', 'gcn', 'skipgcn', 'skipgcn']
        args.model=model_types[args.rank]

    args.learning_rate =random_param_value(args.learning_rate, args.learning_rate_min, args.learning_rate_max, type='logscale')
    # args.adj_mat_time_window = random_param_value(args.adj_mat_time_window, args.adj_mat_time_window_min, args.adj_mat_time_window_max, type='int')

    if args.model == 'gcn':
        args.num_hist_steps = 0
    else:
        args.num_hist_steps = random_param_value(args.num_hist_steps, args.num_hist_steps_min, args.num_hist_steps_max, type='int')

    args.gcn_parameters['feats_per_node'] =random_param_value(args.gcn_parameters['feats_per_node'], args.gcn_parameters['feats_per_node_min'], args.gcn_parameters['feats_per_node_max'], type='int')
    args.gcn_parameters['layer_1_feats'] =random_param_value(args.gcn_parameters['layer_1_feats'], args.gcn_parameters['layer_1_feats_min'], args.gcn_parameters['layer_1_feats_max'], type='int')
    if args.gcn_parameters['layer_2_feats_same_as_l1'] or args.gcn_parameters['layer_2_feats_same_as_l1'].lower()=='true':
        args.gcn_parameters['layer_2_feats'] = args.gcn_parameters['layer_1_feats']
    else:
        args.gcn_parameters['layer_2_feats'] =random_param_value(args.gcn_parameters['layer_2_feats'], args.gcn_parameters['layer_1_feats_min'], args.gcn_parameters['layer_1_feats_max'], type='int')
    args.gcn_parameters['lstm_l1_feats'] =random_param_value(args.gcn_parameters['lstm_l1_feats'], args.gcn_parameters['lstm_l1_feats_min'], args.gcn_parameters['lstm_l1_feats_max'], type='int')
    if args.gcn_parameters['lstm_l2_feats_same_as_l1'] or args.gcn_parameters['lstm_l2_feats_same_as_l1'].lower()=='true':
        args.gcn_parameters['lstm_l2_feats'] = args.gcn_parameters['lstm_l1_feats']
    else:
        args.gcn_parameters['lstm_l2_feats'] =random_param_value(args.gcn_parameters['lstm_l2_feats'], args.gcn_parameters['lstm_l1_feats_min'], args.gcn_parameters['lstm_l1_feats_max'], type='int')
    args.gcn_parameters['cls_feats']=random_param_value(args.gcn_parameters['cls_feats'], args.gcn_parameters['cls_feats_min'], args.gcn_parameters['cls_feats_max'], type='int')
    return args

In [71]:
def build_dataset(args):
    if args.data == 'bitcoinotc' or args.data == 'bitcoinalpha':
        if args.data == 'bitcoinotc':
            args.bitcoin_args = args.bitcoinotc_args
        elif args.data == 'bitcoinalpha':
            args.bitcoin_args = args.bitcoinalpha_args
        return bitcoin_dataset(args)
    
    elif args.data == 'aml_sim':  #
        return aml.Aml_Dataset(args)
    
    elif args.data == 'elliptic':  #
        return ell.Elliptic_Dataset(args)
    
    elif args.data == 'elliptic_temporal':
        return Elliptic_Temporal_Dataset(args)
    
    elif args.data == 'uc_irv_mess':
        return Uc_Irvine_Message_Dataset(args)
    
    elif args.data == 'dbg':  #
        return dbg.dbg_dataset(args)
    
    elif args.data == 'colored_graph':  #
        return cg.Colored_Graph(args)
    
    elif args.data == 'autonomous_syst':
        return Autonomous_Systems_Dataset(args)
    
    elif args.data == 'reddit':
        return Reddit_Dataset(args)
    
    elif args.data.startswith('sbm'):
        if args.data == 'sbm20':
            args.sbm_args = args.sbm20_args
        elif args.data == 'sbm50':
            args.sbm_args = args.sbm50_args
        return sbm_dataset(args)
    else:
        raise NotImplementedError('only arxiv has been implemented')

In [72]:
def build_tasker(args,dataset):
    if args.task == 'link_pred':
        return Link_Pred_Tasker(args,dataset)
    elif args.task == 'edge_cls':
        return Edge_Cls_Tasker(args,dataset)
    elif args.task == 'node_cls':
        return Node_Cls_Tasker(args,dataset)
    elif args.task == 'static_node_cls':
        return Static_Node_Cls_Tasker(args,dataset)
    else:
        raise NotImplementedError('still need to implement the other tasks')

In [73]:
def build_gcn(args,tasker):
    gcn_args = Namespace(args.gcn_parameters)
    gcn_args.feats_per_node = tasker.feats_per_node
    
    if args.model == 'gcn':
        return Sp_GCN(gcn_args,activation = torch.nn.RReLU()).to(args.device)
    
    elif args.model == 'skipgcn':
        return Sp_Skip_GCN(gcn_args,activation = torch.nn.RReLU()).to(args.device)
    
    elif args.model == 'skipfeatsgcn':
        return Sp_Skip_NodeFeats_GCN(gcn_args,activation = torch.nn.RReLU()).to(args.device)
    
    else:
        assert args.num_hist_steps > 0, 'more than one step is necessary to train LSTM'
        if args.model == 'lstmA':
            return Sp_GCN_LSTM_A(gcn_args,activation = torch.nn.RReLU()).to(args.device)
        
        elif args.model == 'gruA':
            return Sp_GCN_GRU_A(gcn_args,activation = torch.nn.RReLU()).to(args.device)
        
        elif args.model == 'lstmB':
            return Sp_GCN_LSTM_B(gcn_args,activation = torch.nn.RReLU()).to(args.device)
        
        elif args.model == 'gruB':
            return Sp_GCN_GRU_B(gcn_args,activation = torch.nn.RReLU()).to(args.device)
        
        elif args.model == 'egcn': #
            return egcn.EGCN(gcn_args, activation = torch.nn.RReLU()).to(args.device)
        
        elif args.model == 'egcn_h':
            return EGCN_H(gcn_args, activation = torch.nn.RReLU(), device = args.device)
        
        elif args.model == 'skipfeatsegcn_h':
            return EGCN_H(gcn_args, activation = torch.nn.RReLU(), device = args.device, skipfeats=True)
        
        elif args.model == 'egcn_o':
            return EGCN_O(gcn_args, activation = torch.nn.RReLU(), device = args.device)
        
        else:
            raise NotImplementedError('need to finish modifying the models')

In [74]:
def build_classifier(args,tasker):
    if 'node_cls' == args.task or 'static_node_cls' == args.task:
        mult = 1
    else:
        mult = 2
        
    if 'gru' in args.model or 'lstm' in args.model:
        in_feats = args.gcn_parameters['lstm_l2_feats'] * mult
        
    elif args.model == 'skipfeatsgcn' or args.model == 'skipfeatsegcn_h':
        in_feats = (args.gcn_parameters['layer_2_feats'] + args.gcn_parameters['feats_per_node']) * mult
        
    else:
        in_feats = args.gcn_parameters['layer_2_feats'] * mult

    return Classifier(args,in_features = in_feats, out_features = tasker.num_classes).to(args.device)

### Main

In [75]:
parser = create_parser()
args = parse_args(parser)

In [76]:
global rank, wsize, use_cuda
args.use_cuda = (torch.cuda.is_available() and args.use_cuda)

In [77]:
args.device = "cpu"
if args.use_cuda:
    args.device = "cuda"
print ("use CUDA:", args.use_cuda, "- device:", args.device)

use CUDA: False - device: cpu


In [78]:
try:
    dist.init_process_group(backend='mpi') #, world_size=4
    rank = dist.get_rank()
    wsize = dist.get_world_size()
    print('Hello from process {} (out of {})'.format(dist.get_rank(), dist.get_world_size()))
    
    if args.use_cuda:
        torch.cuda.set_device(rank)  # are we sure of the rank+1????
        print('using the device {}'.format(torch.cuda.current_device())) 
except:
    rank = 0
    wsize = 1
    print(('MPI backend not preset. Set process rank to {} (out of {})'.format(rank, wsize)))

MPI backend not preset. Set process rank to 0 (out of 1)


In [79]:
if args.seed is None and args.seed != "None":
    seed = 123 + rank # int(time.time())+rank
else:
    seed = args.seed # +rank

In [80]:
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
args.seed=seed
args.rank=rank
args.wsize=wsize

In [81]:
# Assign the requested random hyper parameters
args = build_random_hyper_params(args)

In [82]:
# build the dataset
dataset = build_dataset(args)

TIME tensor(49) tensor(0)


In [83]:
# build the tasker
tasker = build_tasker(args, dataset)

In [84]:
# build the splitter
splitter = splitter(args, tasker)

Dataset splits sizes:  train 29 dev 5 test 10


In [85]:
# build the models
gcn = build_gcn(args, tasker)
classifier = build_classifier(args, tasker)

CLS num_feats 200


In [86]:
# build a loss
cross_entropy = Cross_Entropy(args, dataset).to(args.device)

In [87]:
#trainer
trainer = Trainer(args, splitter=splitter, gcn=gcn, classifier=classifier, comp_loss=cross_entropy, dataset=dataset, num_classes=tasker.num_classes)

Log: STDOUT
INFO:root:*** PARAMETERS ***
INFO:root:{'adj_mat_time_window': 1,
 'class_weights': [0.1, 0.9],
 'comments': ['comments'],
 'data': 'sbm50',
 'data_loading_params': {'batch_size': 1, 'num_workers': 0},
 'dev_proportion': 0.1,
 'device': 'cpu',
 'early_stop_patience': 50,
 'eval_after_epochs': 5,
 'gcn_parameters': {'cls_feats': 100,
                    'cls_feats_max': 800,
                    'cls_feats_min': 100,
                    'feats_per_node': 100,
                    'feats_per_node_max': 256,
                    'feats_per_node_min': 50,
                    'k_top_grcu': 200,
                    'layer_1_feats': 100,
                    'layer_1_feats_max': 200,
                    'layer_1_feats_min': 10,
                    'layer_2_feats': 100,
                    'layer_2_feats_same_as_l1': True,
                    'lstm_l1_feats': 100,
                    'lstm_l1_feats_max': 200,
                    'lstm_l1_feats_min': 10,
                    'lstm_l1_lay

In [88]:
trainer.train()

INFO:root:################ TRAIN epoch 0 ###################
INFO:root:TRAIN mean losses tensor(0.1159)
INFO:root:TRAIN mean errors 0.4268079996109009
INFO:root:TRAIN mean MRR 0.0 - mean MAP 0.1651397294252547
INFO:root:TRAIN tp {0: tensor(14709637), 1: tensor(1896303)},fn {0: tensor(11426164), 1: tensor(938887)},fp {0: tensor(938887), 1: tensor(11426164)}
INFO:root:TRAIN measures microavg - precision 0.5732 - recall 0.5732 - f1 0.5732 
INFO:root:TRAIN measures for class 0 - precision 0.9400 - recall 0.5628 - f1 0.7041 
INFO:root:TRAIN measures for class 1 - precision 0.1423 - recall 0.6688 - f1 0.2347 
INFO:root:TRAIN measures@10 microavg - precision 0.5596 - recall 0.0000 - f1 0.0000 
INFO:root:TRAIN measures@10 for class 0 - precision 0.9000 - recall 0.0000 - f1 0.0000 
INFO:root:TRAIN measures@10 for class 1 - precision 0.2043 - recall 0.0000 - f1 0.0000 
INFO:root:TRAIN measures@100 microavg - precision 0.5740 - recall 0.0001 - f1 0.0002 
INFO:root:TRAIN measures@100 for class 0 -

KeyboardInterrupt: 