Imports ...

In [1]:
import numpy as np
import networkx as nx
import pickle
from typing import Dict, List, Tuple

Read in data ...

In [2]:
demographics = pickle.load(open('data/demographics.pkl', 'rb'))

comb_counts_feb = pickle.load(open('data/comb_counts_feb.pkl', 'rb'))
comb_counts_apr = pickle.load(open('data/comb_counts_apr.pkl', 'rb'))

trip_counts_feb = pickle.load(open('data/trip_counts_feb.pkl', 'rb'))
trip_counts_apr = pickle.load(open('data/trip_counts_apr.pkl', 'rb'))

In [3]:
ordered_cbgs = sorted(demographics.keys())

In [4]:
def create_adjacency_list(comb_counts, trip_counts):
    adjacency_list: Dict[str, List[float]] = {}
        
    for i in ordered_cbgs:
        adjacency_list[i] = []
        for j in ordered_cbgs:

            # count of trips between 
            comb = (i, j)
            trips_between = 0 if not comb in comb_counts else comb_counts[comb]

            # ratio of all trips from i
            p = 0 if not i in trip_counts else trips_between / trip_counts[i]
            adjacency_list[i].append(p)
                
    return adjacency_list

def cum_prob_from_adj_list(adjacency_list):
    for key in adjacency_list:
        adjacency_list[key] = np.array(adjacency_list[key]).cumsum()

    return adjacency_list

In [5]:
%%time

adj_list_feb = create_adjacency_list(comb_counts_feb, trip_counts_feb)
adj_list_apr = create_adjacency_list(comb_counts_apr, trip_counts_apr)

CPU times: user 616 ms, sys: 27.1 ms, total: 643 ms
Wall time: 643 ms


In [6]:
# sanity check
print(f"Should be ~{1}, is {sum(adj_list_feb[ordered_cbgs[0]])}")
print(f"Should be ~{1}, is {sum(adj_list_apr[ordered_cbgs[0]])}")

Should be ~1, is 0.9999999999999989
Should be ~1, is 0.9999999999999992


In [7]:
%%time

cum_prob_feb = cum_prob_from_adj_list(adj_list_feb)
cum_prob_apr = cum_prob_from_adj_list(adj_list_apr)

CPU times: user 156 ms, sys: 24.3 ms, total: 181 ms
Wall time: 184 ms


In [8]:
# sanity check
print(f"Should be ~{1}, is {cum_prob_feb[ordered_cbgs[0]][-1]}")
print(f"Should be ~{1}, is {cum_prob_feb[ordered_cbgs[0]][-1]}")

Should be ~1, is 0.9999999999999989
Should be ~1, is 0.9999999999999989


Define the distribution generators...

In [9]:
def household_size_distribution(cbg: str):
    """
    Household size distribution is drawn from normal distribution with mean according to mean household size of CBG.
    """
    rng = np.random.default_rng()
    mu = demographics[cbg]['household_size']
    sd = mu / 2
    return max(int(rng.normal(mu, sd)), 1)

def contact_distribution(size: int):
    """
    Number of noded in a household that are connected to other households
    """
    rng = np.random.default_rng()
    # return max(int(rng.normal(min(size / 2, 2), 2)), 1)
    # come up with something but I think this probably makes most sense for an undistanced network
    return size


def household_contact_distribution(cbg1):
    """
    Number of connections from a node to another node outside the household.
    """
    # think this should be exponential (with cutoff ? -> todo)
    rng = np.random.default_rng()
    return max(int(rng.exponential(10.0), 1))

def draw_rewire_distribution(cbg, cum_prob):
    rng = np.random.default_rng()
    r = rng.random()
    
    # first instance where p >= r
    idx = next(i for i, p in enumerate(cum_prob[cbg]) if p >= r)
    return ordered_cbgs[idx]

In [10]:
print(draw_rewire_distribution(ordered_cbgs[0], cum_prob_feb))
print(draw_rewire_distribution(ordered_cbgs[0], cum_prob_apr))

090091253002
090091201001
