In [None]:
# Authors thank Koya Sato (https://www.researchgate.net/profile/Koya_Sato) for providing code to compute supra-adjacency matrices

In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import codecs
import pickle
import os
import math
import itertools as it

In [2]:
# temporal aggregation interval
AGGR_TIME = 10*60

# temporal coupling weight
temp_edge_weight_list = [1]#,2,4,8,16]

# temporal coupling repr
rep_time_list = ['Non']
#rep_time_list = ['Inverse']

# dataset_list = ['LyonSchool', 'InVS15', 'SFHH', 'LH10', 'Thiers13']
dataset_list = ['LyonSchool']
# dataset_list = ['InVS15']
#dataset_list = ['SFHH']
# dataset_list = ['LH10']
# dataset_list = ['Thiers13']

supra_rep_list = ['Dyn', 'Stat']
# supra_rep_list = ['Stat']

In [3]:
# code node ID and timestamp into new "temporal" node ID of the form "node-time"
def add_temporal_edge(g, n1, t1, n2, t2, weight):
    n1_t1 = '%d-%d' % (n1, t1)
    n2_t2 = '%d-%d' % (n2, t2)
    
    g.add_edge(n1_t1, n2_t2, weight = weight)

# get first event for node n strictly after time t0
def get_next_event(df_tnet, n, t0):
    df = df_tnet[(df_tnet.tslice > t0) & ((df_tnet.i == n) | (df_tnet.j == n)) ]
    if len(df) > 0:
        return df.iloc[0].tslice
    else:
        return None
def get_previous_event(df_tnet, n, t0):
    df = df_tnet[(df_tnet.tslice < t0) & ((df_tnet.i == n) | (df_tnet.j == n)) ]
    if len(df) > 0:
        return df.iloc[-1].tslice
    else:
        return None

In [4]:
# return time_aggregated_temporal_net(s_temp_net), 
# also, save the list of ID and active time, which is a index of embedding step.
def load_temp_data(dataset):
    df_temp_net = pd.read_csv(('../data/Data_SocioPatterns_20s_nonights/tij_%s.dat_nonights.dat' % dataset),
                        sep = '\t', header = None,
                        names = ['t', 'i', 'j'])
    # compute slice each contact event belongs to
    df_temp_net.loc[:,'tslice'] = np.floor((df_temp_net.t - df_temp_net.t.iloc[0]) / AGGR_TIME)
    # group over (slice, i, j), and compute number of contacts within time slice,
    # regarded as "weight" for contacts in each time slice
    
    df_temp_net = df_temp_net[df_temp_net.i!=df_temp_net.j]
    s = df_temp_net['i'] > df_temp_net['j']
    df_temp_net.loc[s, ['i','j']] = df_temp_net.loc[s, ['j','i']].values
    df_temp_net.drop_duplicates(['t','i','j'], inplace=True)
    
    s_temp_net = df_temp_net.groupby(['tslice','i','j']).size().rename('weight')
    
    # times for all temporal slices, note that it may have a big gap (return to home)
    partial_times = sorted(list(s_temp_net.index.levels[0]))

    # convenience: dataframe version of the series above
    df_tnet = s_temp_net.reset_index()

    #make list of ID and active time
    #sort the embedding result by using this list.
    pat_active_time = [[('%d-%d' % (e[1], e[0])), ('%d-%d' % (e[2], e[0]))] for e, weight in s_temp_net.iteritems()]
    pat_active_time = list(set([item for sublist in pat_active_time for item in sublist]))

    target_dir = '../preprocessed/SupraAdjacencyMatrix/%s' %dataset
    os.makedirs(target_dir, exist_ok=True)
    
    pat_active_time_file_name = "../preprocessed/SupraAdjacencyMatrix/%s/PatActiveTimeAggtime%d.pkl" % (dataset, AGGR_TIME)
    if (0==os.path.isfile(pat_active_time_file_name)):
        pickle.dump(pat_active_time, open(pat_active_time_file_name, "wb" ) )
    
    return partial_times, s_temp_net, df_tnet

In [5]:
def make_dyn_supra(partial_times,s_temp_net, df_tnet, temp_edge_weight, rep_time):
    supra_G = nx.DiGraph() # directed
    # loop over time slices
    for t0 in partial_times:
        # loop over all edges at time t0
        for e, w in s_temp_net[t0].iteritems():
            n1, n2 = e
            # future event times for nodes n1 and n2
            t1 = get_next_event(df_tnet, n1, t0)
            t2 = get_next_event(df_tnet, n2, t0)

            if t1 != None:
                cop_edge_weight_1 = temp_edge_weight
                if rep_time == 'Inverse':
                    cop_edge_weight_1 = cop_edge_weight_1 * 1./float(t1-t0)
                    w = w * 1./float(t1-t0)    
                elif rep_time == 'Non':
                    pass
                add_temporal_edge(supra_G, n2, t0, n1, t1, weight=w) # add "cross" edge
                add_temporal_edge(supra_G, n1, t0, n1, t1, weight=cop_edge_weight_1) # add "temporal" edge

            if t2 != None:
                cop_edge_weight_2 = temp_edge_weight
                if rep_time == 'Inverse':
                    cop_edge_weight_2 = cop_edge_weight_2 * 1./float(t2-t0)
                    w = w * 1./float(t2-t0)
                elif rep_time == 'Non':
                    pass
                add_temporal_edge(supra_G, n2, t0, n2, t2, weight=cop_edge_weight_2) # add "cross" edge
                add_temporal_edge(supra_G, n1, t0, n2, t2, weight=w) # add "temporal" edge
    return supra_G

In [6]:
def make_stat_supra(partial_times, s_temp_net, df_tnet):
    # loop over time slices
    supra_G = nx.DiGraph() # directed
    for t0 in partial_times:
        # loop over all edges at time t0
        for e, w in s_temp_net[t0].iteritems():
            n1, n2 = e        
            # add inter-layer edges
            add_temporal_edge(supra_G, n2, t0, n1, t0, weight=w)
            add_temporal_edge(supra_G, n1, t0, n2, t0, weight=w)
    return supra_G

In [7]:
def save_supra(dataset, supra_G, supra_rep, temp_edge_weight=None, rep_time=None):
    target_dir = '../preprocessed/SupraAdjacencyMatrix/%s/%s' %(dataset, supra_rep)
    os.makedirs(target_dir, exist_ok = True)
    if supra_rep == 'Stat':
        nx.write_gpickle(supra_G, (target_dir + '/Aggtime%d.gpickle' % (AGGR_TIME)))
    else:
        nx.write_gpickle(supra_G, (target_dir + '/Aggtime%dWeight%dReptime%s.gpickle' % (AGGR_TIME, temp_edge_weight, rep_time)))

In [8]:
def save_supra_allnodes(dataset, supra_G, supra_rep, temp_edge_weight=None, rep_time=None):
    target_dir = '../preprocessed/SupraAdjacencyMatrixAllNodes/%s/%s' %(dataset, supra_rep)
    os.makedirs(target_dir, exist_ok = True)
    if supra_rep == 'Stat':
        nx.write_gpickle(supra_G, (target_dir + '/Aggtime%d.gpickle' % (AGGR_TIME)))
    else:
        nx.write_gpickle(supra_G, (target_dir + '/Aggtime%dWeight%dReptime%s.gpickle' % (AGGR_TIME, temp_edge_weight, rep_time)))

In [9]:
def add_inactive_nodes_to_supra(supra_G):
    #rename nodes and times according to their ordering
    node_name = [(int(node.split('-')[0]), int(node.split('-')[1])) for node in supra_G.nodes()]
    unique_node_name = np.unique([n for n,t in node_name])
    unique_time_name = np.unique([t for n,t in node_name])
    map_node_index = {node:index for index, node in enumerate(unique_node_name)}
    map_time_index = {time:index for index, time in enumerate(unique_time_name)}
    
    renamed_nodes = {str(n)+'-'+ str(t): str(map_node_index[n])+'-'+str(map_time_index[t]) for n,t in node_name}
    supra_H = nx.relabel_nodes(supra_G, renamed_nodes)
    
    NR_NODES = unique_node_name.shape[0]
    NR_TIMES = unique_time_name.shape[0]
    allnodes_alltimes = set(map(lambda x: str(x[0])+'-'+str(x[1]), it.product(range(NR_NODES), range(NR_TIMES))))
    to_add = allnodes_alltimes.difference(renamed_nodes.values())
    supra_H.add_nodes_from(to_add)
    return supra_H

In [11]:
for dataset in dataset_list:
    partial_times, s_temp_net, df_tnet = load_temp_data(dataset)
    for supra_rep in supra_rep_list:
        if supra_rep == 'Stat':
            supra_G = make_stat_supra(partial_times, s_temp_net, df_tnet)
            save_supra(dataset, supra_G, supra_rep)
            supra_G = add_inactive_nodes_to_supra(supra_G)
            save_supra_allnodes(dataset, supra_G, supra_rep)
            
        else:
            for temp_edge_weight in temp_edge_weight_list:
                for rep_time in rep_time_list:
                    supra_G = make_dyn_supra(partial_times, s_temp_net, df_tnet, temp_edge_weight, rep_time)
                    save_supra(dataset, supra_G, supra_rep, temp_edge_weight, rep_time)
                    supra_G = add_inactive_nodes_to_supra(supra_G)
                    save_supra_allnodes(dataset, supra_G, supra_rep, temp_edge_weight, rep_time)

# For Link Prediction:

In [10]:
# return time_aggregated_temporal_net(s_temp_net), 
# also, save the list of ID and active time, which is a index of embedding step.
def load_modified_temp_data(dataset):
    df_temp_net = pd.read_csv(('../preprocessed/RemovedLinksTempNet/%s/tij_%s_7030_0.csv.gz' % (dataset,  dataset)),
                        sep = ',', header = None,
                        names = ['t', 'i', 'j'])
    # compute slice each contact event belongs to
    df_temp_net.loc[:,'tslice'] = np.floor((df_temp_net.t - df_temp_net.t.iloc[0]) / AGGR_TIME)
    # group over (slice, i, j), and compute number of contacts within time slice,
    # regarded as "weight" for contacts in each time slice
    
    df_temp_net = df_temp_net[df_temp_net.i!=df_temp_net.j]
    s = df_temp_net['i'] > df_temp_net['j']
    df_temp_net.loc[s, ['i','j']] = df_temp_net.loc[s, ['j','i']].values
    df_temp_net.drop_duplicates(['t','i','j'], inplace=True)
    
    s_temp_net = df_temp_net.groupby(['tslice','i','j']).size().rename('weight')
    
    # times for all temporal slices, note that it may have a big gap (return to home)
    partial_times = sorted(list(s_temp_net.index.levels[0]))

    # convenience: dataframe version of the series above
    df_tnet = s_temp_net.reset_index()

    #make list of ID and active time
    #sort the embedding result by using this list.
    pat_active_time = [[('%d-%d' % (e[1], e[0])), ('%d-%d' % (e[2], e[0]))] for e, weight in s_temp_net.iteritems()]
    pat_active_time = list(set([item for sublist in pat_active_time for item in sublist]))

    target_dir = '../preprocessed/SupraAdjacencyMatrix/%s' %dataset
    os.makedirs(target_dir, exist_ok=True)
    
    pat_active_time_file_name = "../preprocessed/SupraAdjacencyMatrix/%s/PatActiveTimeAggtime%d.pkl" % (dataset, AGGR_TIME)
    if (0==os.path.isfile(pat_active_time_file_name)):
        pickle.dump(pat_active_time, open(pat_active_time_file_name, "wb" ) )
    
    return partial_times, s_temp_net, df_tnet

In [11]:
for dataset in dataset_list:
    partial_times, s_temp_net, df_tnet = load_modified_temp_data(dataset)
    for supra_rep in supra_rep_list:
        if supra_rep == 'Stat':
            supra_G = make_stat_supra(partial_times, s_temp_net, df_tnet)
            save_supra(dataset+'7030_0', supra_G, supra_rep)
            supra_G = add_inactive_nodes_to_supra(supra_G)
            save_supra_allnodes(dataset+'7030_0', supra_G, supra_rep)
            
        else:
            for temp_edge_weight in temp_edge_weight_list:
                for rep_time in rep_time_list:
                    supra_G = make_dyn_supra(partial_times, s_temp_net, df_tnet, temp_edge_weight, rep_time)
                    save_supra(dataset+'7030_0', supra_G, supra_rep, temp_edge_weight, rep_time)
                    supra_G = add_inactive_nodes_to_supra(supra_G)
                    save_supra_allnodes(dataset+'7030_0', supra_G, supra_rep, temp_edge_weight, rep_time)