# Cluster Network Creation

Perform computationally expensive network analysis on a computing cluster. Results are pushed to a data repository on GitHub.

In [1]:
# to allow relative imports
import os
import sys

module = os.path.abspath(os.path.join('..'))
if module not in sys.path:
    sys.path.append(module)

# experiment
from epyc import Lab, JSONLabNotebook, RepeatedExperiment
from data_processing.metrics import MetricExperiment

# generators
from epydemic import PLCNetwork
from model.network.mobility_network import MNGeneratorFromNetworkData as MNG
from model.network.distanced_network import DNGenerator as DNG

# distributions
from model.distributions import discrete_trunc_normal, discrete_trunc_exponential, num_contact_dist

# file utils
from data_processing.data_repo_api import DataRepoAPI
from data_processing.utils import load_network_data_from_files
from configuration import OUT

# other utils
from functools import partial
import numpy as np
from mpmath import polylog

In [2]:
# Some global parameters

N = 1000
CUTOFF = 40
n_exp = 10

# Experiment result output dir
output_dir = 'experiment_results'
if not output_dir in os.listdir():
    os.mkdir(output_dir)

# Helper functions
def run_network_lab_experiment(generator, params, file_name, description, n=n_exp,
                               output_dir=output_dir, push_to_repo=True):
    """
    Run a MetricExperiment repeatadly using a network generator.
    """

    file = os.path.join(output_dir, file_name)
    
    nb = JSONLabNotebook(file, create=True, description=description)
    lab = Lab(notebook=nb)
    
    e = MetricExperiment(generator)

    # pass params to the lab
    for k, v in params.items():
        lab[k] = v

    lab.runExperiment(RepeatedExperiment(e, n))
    
    if push_to_repo:
        DataRepoAPI.update_or_create(file_name=file_name, file_path=output_dir)
    
    return lab.results()

def plc_mean(exponent, cutoff):
    """
    Calculate theoretical mean of a power law with cutoff distribution from
    the exponent and the cutoff.
    """
    nom = polylog(exponent-1, np.exp(-1 / cutoff))
    denom = polylog(exponent, np.exp(-1 / cutoff))
    return nom / denom

def estimate_exponent_for_mean(mean, cutoff, tolerance):
    """
    Given the mean and cutoff of a power law with cutoff distribution,
    estimate the corresponding exponent with a given tolerance.
    """
    grid = np.arange(1, 5, 0.01)
    diffs = []
    for i in range(len(grid)):
        diff = abs(mean - plc_mean(grid[i], cutoff))
        if diff < tolerance:
            return grid[i]
        diffs.append(diff)
    
    return grid[diffs.index(min(diffs))]

### Mobility network

In [3]:
# Load the network data

file_names = dict(
    demographics='demographics.pkl',
    comb_pre='comb_counts_pre.pkl',
    comb_post='comb_counts_post.pkl',
    trip_pre='trip_counts_pre.pkl',
    trip_post='trip_counts_post.pkl',
)

network_data = load_network_data_from_files(file_names, OUT)

network_data_pre = network_data['pre']
network_data_post = network_data['post']

In [4]:
# Define the generator parameters
params_pre = dict()
params_pre[MNG.N] = N
params_pre[MNG.EXPONENT] = 2
params_pre[MNG.CUTOFF] = CUTOFF
params_pre[MNG.MULTIPLIER] = False

params_post = dict()
params_post[MNG.N] = N
params_post[MNG.EXPONENT] = 2
params_post[MNG.CUTOFF] = CUTOFF
params_post[MNG.MULTIPLIER] = True

In [5]:
mng_pre = MNG(network_data=network_data_pre)
mng_pre_results = run_network_lab_experiment(mng_pre, params_pre, 'mobility_pre.json', 
                                             'Network metrics of Mobility pre')

In [6]:
mng_post = MNG(network_data=network_data_post)
mng_post_results = run_network_lab_experiment(mng_post, params_post, 'mobility_post.json', 
                                              'Network metrics of Mobility post')

In [7]:
def extract_mean_deg(results):
    deg_lis = [x['results']['degrees'] for x in results]
    return np.mean([i for s in deg_lis for i in s])

mob_deg_pre = extract_mean_deg(mng_pre_results)
mob_deg_post = extract_mean_deg(mng_post_results)

### PLC network

In [8]:
# Estimate the exponent for the PLC to achieve same degree
#  as the mobility networks

plc_expo_pre = estimate_exponent_for_mean(mob_deg_pre, CUTOFF, 0.01)
plc_expo_post = estimate_exponent_for_mean(mob_deg_post, CUTOFF, 0.01)

In [9]:
# Define the generator parameters
params_plc_pre = dict()
params_plc_pre[PLCNetwork.N] = 10000
params_plc_pre[PLCNetwork.EXPONENT] = plc_expo_pre
params_plc_pre[PLCNetwork.CUTOFF] = CUTOFF

params_plc_post = dict()
params_plc_post[PLCNetwork.N] = 10000
params_plc_post[PLCNetwork.EXPONENT] = plc_expo_post
params_plc_post[PLCNetwork.CUTOFF] = CUTOFF

In [10]:
plc_pre = PLCNetwork()
_ = run_network_lab_experiment(plc_pre, params_plc_pre, 'plc_pre.json', 
                               'Network metrics of PLC pre')

In [11]:
plc_post = PLCNetwork()
_ = run_network_lab_experiment(plc_post, params_plc_post, 'plc_post.json', 
                               'Network metrics of PLC post')

### Distanced network

In [12]:
household_size_dist = partial(discrete_trunc_normal, mu=4.5, std=2)

params_distanced_pre = dict()
params_distanced_pre[DNG.N] = 10000

params_distanced_post = dict()
params_distanced_post[DNG.N] = 10000

In [13]:
distanced_pre = DNG(
    household_size_dist=household_size_dist,
    num_contact_dist=num_contact_dist,
    num_outside_edge_dist=partial(discrete_trunc_exponential, exponent=mob_deg_pre)
)
_ = run_network_lab_experiment(distanced_pre, params_distanced_pre, 'distanced_pre.json', 
                               'Network metrics of PLC pre')

In [14]:
distanced_post = DNG(
    household_size_dist=household_size_dist,
    num_contact_dist=num_contact_dist,
    num_outside_edge_dist=partial(discrete_trunc_exponential, exponent=mob_deg_post)
)
_ = run_network_lab_experiment(distanced_post, params_distanced_post, 'distanced_post.json', 
                               'Network metrics of PLC post')