In [1]:
import numpy as np
import networkx as nx
import json

def extract_networks(data_dir, dataset, n_minutes=5, original_nets=True):
    """Function that reads the edgelist (t, i, j) and returns
    a network aggregated at n_minutes snapshots as a dictionary of nx.Graph()s,
    having t as a key.
    If original_nets is set to True it also returns the original non-aggregated network."""

    # Reading the data and setting t0
    f = open(data_dir + '/tij_' + dataset + '.dat')
    (t0, i, j) = map(int, str.split(f.readline()))
    # Special temporal scale for these two Datasets
    if dataset not in ['LyonSchool', 'LH10']:
        t0 = t0 * 20
    f.close()

    # Aggregation on scale of x minutes
    delta_t = 20 * 3 * n_minutes
    if original_nets == True:
        originalnetworks = {}
    aggnetworks = {}
    f = open(data_dir + '/tij_' + dataset + '.dat')
    for line in f:
        (t, i, j) = map(int, str.split(line))
        # Special temporal scale for these two Datasets
        if dataset not in ['LyonSchool', 'LH10']:
            t = t * 20
        if original_nets == True:
            if t not in originalnetworks:
                originalnetworks[t] = nx.Graph()
            originalnetworks[t].add_edge(i, j)
        # this is a trick using the integer division in python
        aggtime = t0 + ((t - t0) // delta_t) * delta_t
        if aggtime not in aggnetworks:
            aggnetworks[aggtime] = nx.Graph()
        aggnetworks[aggtime].add_edge(i, j)
    f.close()
    if original_nets == True:
        return originalnetworks, aggnetworks
    else:
        return aggnetworks

def extract_cliques(gs):
    listsaggcliques = {}
    # looping over the networks in temporal order
    for t in sorted(gs.keys()):
        listsaggcliques[t] = list(nx.find_cliques(gs[t]))
    # returning a dictionary with list of cliques as values
    return listsaggcliques

def clique_weights(cliques):
    from collections import Counter
    tot_c = []
    for t in cliques:
        tot_c.extend(map(frozenset, cliques[t]))
    return Counter(tot_c)

def clean_non_maximal(ws):
    sd = dict(zip(ws.keys(), map(len, ws.keys())))
    import operator
    sizes = set(map(len, ws.keys()))
    sorted_sd = sorted(sd.items(), key=operator.itemgetter(1))
    simplices = dict.fromkeys(list(sizes), [])
    maximal_simplices = {}
    for x in ws:
        maximal = True
        for xx in ws:
            if (len(x) < len(xx)):
                if (set(x) < set(xx)):
                    maximal = False
                    break
        if maximal:
            maximal_simplices[x] = ws[x]
    return maximal_simplices


def save_cliques(ws, data_dir, dataset, n_minutes, thr=None):
    if thr == None:
        ls = map(list, ws.keys())
    else:
        ls = [list(x) for x in ws if ws[x] >= thr]
    jd = open(data_dir + 'aggr_' + str(n_minutes) + 'min_cliques_thr' + str(thr) + '_' + dataset + '.json', 'w')
    json.dump(ls, jd)
    jd.close()
    return

In [2]:
dataset_dir = 'Originaldata_20s/'
out_dir = ''

datasets = ['InVS15','LH10','LyonSchool','SFHH','Thiers13']

n_minutes = 15
thrs = [1]

for dataset in datasets:
    for thr in thrs:
        aggs = extract_networks(dataset_dir, dataset, n_minutes, original_nets=False)
        cliques = extract_cliques(aggs)

        ws = clique_weights(cliques)
        maximal_cliques = clean_non_maximal(ws)
        save_cliques(maximal_cliques, out_dir, dataset, n_minutes, thr=thr)