Imports ...

In [1]:
import numpy as np
import networkx as nx
from matplotlib import pyplot as plt
import pickle
from typing import Dict, List, Tuple, Callable

Read in data ...

In [2]:
demographics = pickle.load(open('data/demographics.pkl', 'rb'))

comb_counts_feb = pickle.load(open('data/comb_counts_feb.pkl', 'rb'))
comb_counts_apr = pickle.load(open('data/comb_counts_apr.pkl', 'rb'))

trip_counts_feb = pickle.load(open('data/trip_counts_feb.pkl', 'rb'))
trip_counts_apr = pickle.load(open('data/trip_counts_apr.pkl', 'rb'))

In [3]:
# store all CBGs in order

ordered_cbgs = sorted(demographics.keys())

Create adjacency list etc. between CBGs...

In [4]:
def create_adjacency_list(comb_counts: Dict[Tuple[str, str], int], 
                          trip_counts: Dict[str, int]) -> Dict[str, List[float]]:
    """
    Create an adjacency list in the form of a dictionary. 
    The keys are the CGBs and the values are ordered lists of transition probabilities to other CBGs.
    The probabilities are in the same order as `ordered_cbgs`.
    The transition probability from CBG i to CBG j is
    P(i, j) = count(trips between i and j) / count(all trips from i to a CBG).
    :param comb_counts: Total count of all trips between two CGBs.
    :param trip_counts: Total count of all trips from each CGB.
    :returns: Adjacency list with probabilites.
    """
    adjacency_list: Dict[str, List[float]] = {}
        
    for i in ordered_cbgs:
        adjacency_list[i] = []
        for j in ordered_cbgs:

            # count of trips between 
            comb = (i, j)
            trips_between = 0 if not comb in comb_counts else comb_counts[comb]

            # ratio of all trips from i
            p = 0 if not i in trip_counts else trips_between / trip_counts[i]
            adjacency_list[i].append(p)
                
    return adjacency_list

def cum_prob_from_adj_list(adjacency_list: Dict[str, List[float]]) -> Dict[str, np.array]:
    """
    From an adjacency list with transition probabilities, calculate the cumulative probabilities.
    :param adjacency_list: Adjacency list with probabilities.
    :returns: Cummulative probabilities for each item in the adjacency list.
    """
    for key in adjacency_list:
        adjacency_list[key] = np.array(adjacency_list[key]).cumsum()

    return adjacency_list

In [5]:
%%time

adj_list_feb = create_adjacency_list(comb_counts_feb, trip_counts_feb)
adj_list_apr = create_adjacency_list(comb_counts_apr, trip_counts_apr)

CPU times: user 576 ms, sys: 17.6 ms, total: 593 ms
Wall time: 593 ms


In [6]:
# sanity check
print(f"Should be ~{1}, is {sum(adj_list_feb[ordered_cbgs[0]])}")
print(f"Should be ~{1}, is {sum(adj_list_apr[ordered_cbgs[0]])}")

Should be ~1, is 0.9999999999999989
Should be ~1, is 0.9999999999999992


In [7]:
%%time

cum_prob_feb = cum_prob_from_adj_list(adj_list_feb)
cum_prob_apr = cum_prob_from_adj_list(adj_list_apr)

CPU times: user 138 ms, sys: 13.1 ms, total: 151 ms
Wall time: 149 ms


In [8]:
# sanity check
print(f"Should be ~{1}, is {cum_prob_feb[ordered_cbgs[0]][-1]}")
print(f"Should be ~{1}, is {cum_prob_feb[ordered_cbgs[0]][-1]}")

Should be ~1, is 0.9999999999999989
Should be ~1, is 0.9999999999999989


Define the distribution generators...

In [17]:
def household_size_distribution(cbg: str) -> int:
    """
    Household size distribution is drawn from normal distribution with mean 
    according to mean household size of CBG.
    :param cbg: CBG of the household.
    :returns: Household size.
    """
    rng = np.random.default_rng()
    mu = demographics[cbg]['household_size']
    sd = mu / 2
    return max(int(rng.normal(mu, sd)), 1)

def contact_distribution(size: int) -> int:
    """
    Number of nodes in a household that are connected to other households
    :param size: Size of the household.
    :returns: Number of connected nodes.
    """
    rng = np.random.default_rng()
    # return max(int(rng.normal(min(size / 2, 2), 2)), 1)
    # come up with something but I think this probably makes most sense for an undistanced network
    return size


def household_contact_distribution() -> int:
    """
    Number of connections from a node to another node outside the household.
    :returns: Number of connections to outside the household.
    """
    # todo
    # think this should be exponential (with cutoff ?)
    rng = np.random.default_rng()
    return max(int(rng.exponential(10.0)), 1)

def draw_rewire_distribution(cbg: str, cum_prob: Dict[str, np.array]) -> str:
    """
    For a given CBG draw from the corresponding rewire distribution and return a random CBG to rewire to.
    :param cbg: CBG to be rewired.
    :param cum_prob: The cummulative probability distribution to draw from.
    """
    rng = np.random.default_rng()
    r = rng.random()
    
    # first instance where p >= r
    idx = next(i for i, p in enumerate(cum_prob[cbg]) if p >= r)
    return ordered_cbgs[idx]

In [18]:
# test whether the draw_rewire works as suspected
print(draw_rewire_distribution(ordered_cbgs[0], cum_prob_feb))
print(draw_rewire_distribution(ordered_cbgs[0], cum_prob_apr))

090091201001
090091509002


Create the network...

(this is based on Dobson 2020, Chapter Physical Distancing)

In [19]:
def create_network() -> nx.Graph:
    """
    Create the network based on some specified distributions.
    :returns: Network.
    """
    
    # initialise variables
    g = nx.Graph()
    rng = np.random.default_rng()
    household_id = 1
    households = []
    
    done_cbgs = set()

    # add the nodes and create the household connections
    for cbg, demographic in demographics.items():
        
        done_cbgs.add(cbg)
        
        N = demographic['population']
        n = 0
        
        if len(done_cbgs) == 50:
            break
            ...
            
        print(f"CBG {len(done_cbgs)} of {len(demographics)}\r", end="")
        
        while n < N:
            
            # create household network
            size = household_size_distribution(cbg)
            house_net = nx.complete_graph(size)
            
            # add unique labels and the current cbg
            nx.relabel_nodes(house_net, lambda l: str(l) + cbg + str(n))
            nx.set_node_attributes(house_net, cbg, 'cbg')
            
            # add nodes to the main network
            g.add_nodes_from(house_net.nodes, cluster=household_id, cluster_size=size)
            g.add_edges_from(house_net.edges, cluster=household_id, cluster_size=size)
            households.append(house_net)

            # update iteration values
            n += size
            household_id += 1
            
    # number of individuals connected to the outside
    # todo this is probably obsolete in my version since all nodes are connected to the outside
    contacts = []
    for house in households:
        size = house.order()
        num_contacts = contact_distribution(size)
        contacts.append(num_contacts)
        
    # create stubs for connections to outside of household
    stubs = []
    for i in range(len(households)):
        house = households[i]
        
        # again.. some of this might be obsolete by now (see above). Might just take all ...
        nodes = list(house.nodes())[:contacts[i]]
        
        for node in nodes:
            # draw random degree
            degree = household_contact_distribution()
            
            # append `degree` number of copies of the current node
            stubs.extend([node] * degree)
        
    # add one more if number of stubs is odd
    if len(stubs) % 2:
        unique_stubs = list(set(stubs))
        j = rng.integers(len(unique_stubs))
        stubs.append(unique_stubs[j])
        
    ...
        
    
    return (g, [h.order() for h in households])

In [20]:
%%time
N = sum(cbg['population'] for cbg in demographics.values())
(g, clusters) = create_network()

CPU times: user 12.7 s, sys: 450 ms, total: 13.1 s
Wall time: 13.1 s


In [21]:
print('Mean household size of {s:.2f} (range {minf}-{maxf})'.format(s=np.mean(clusters), minf=min(clusters), maxf=max(clusters)))
print('Most connected individual has {k} contacts'.format(k=max(dict(g.degree()).values())))

Mean household size of 2.09 (range 1-9)
Most connected individual has 8 contacts
