# Generates Mobility file for inference



In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
if '..' not in sys.path:
    sys.path.append('..')
    
from matplotlib import pyplot as plt
%matplotlib inline

import pandas as pd
import numpy as np
import networkx as nx
import copy
import scipy as sp
import math
import seaborn
import pickle
import warnings
import os

from lib.mobilitysim import MobilitySimulator
from lib.town_data import generate_population, generate_sites, compute_distances
from lib.town_maps import MapIllustrator

### Settings for synthetic mobility data generation

In [3]:
downsample_population = 200 # Downsample population numbers by a factor of 20
downsample_sites = 100 # Downsample sites by a factor of 10

# Set the population generation mode.
# 3 options available: custom | random | heuristic
population_by = 'custom'

### Town details

In [4]:
population_path='lib/data/population_sf/' # Directory containing FB population density files
sites_path='lib/data/queries_sf/' # Directory containing OSM site files
bbox = (37.7115, 37.8127, -122.5232, -122.3539) # Coordinate bounding box

# Population per age group in Landkreis Tübingen
population_per_age_group = np.array([
    38715,  # 0-4
    59181,  # 5-14
    30824,  # 15-19
    52567,  # 20-24
    329257, # 25-44
    167051,  # 45-59
    136499,  # 60-79
    36188]) # 80+
town_population = 850282 
region_population = population_per_age_group.sum()

# Downsample population
population_per_age_group = np.round(
    population_per_age_group * (town_population / (downsample_population * region_population))).astype('int').tolist()

print(f'Population per age group: {population_per_age_group}')

Population per age group: [194, 296, 154, 263, 1646, 835, 682, 181]


In [47]:
# proportion of total population that are essential workers
prop_essential_total = 0.5

# proportion of all essential workers within each age group
prop_essential_per_age_group = np.array([
    0,   # 0-4
    0,   # 5-14
    .04,  # 15-19
    .06,  # 20-24
    .45,  # 25-44
    .24,  # 45-59
    .20, # 60-79
    0])  # 

# proportion of each age group that are essential workers
essential_prop_per_age_group = (prop_essential_per_age_group*prop_essential_total) / (np.array(population_per_age_group) / sum(population_per_age_group))
print(f'Proportion of age groups that are essential workers: {essential_prop_per_age_group}')

Proportion of age groups that are essential workers: [0.         0.         0.55207792 0.48490494 0.58109052 0.61092216
 0.62331378 0.        ]


### Extracted site data

* site_loc: list of site coordinates
* site_type: list of site category
* site_dict: helper dictionary with real name (string) of each site category (int)
* density_site_loc: list of site coordinates of specific type to be based on to generate population density

To generate sites of arbitrary sites for a given city, the following function sends queries to OpenStreetMap. In order to use it for additional types of sites, you need to specify queries in the Overpass API format. For more information, check the existing queries in **/lib/data/queries/**, https://wiki.openstreetmap.org/wiki/Overpass_API and http://overpass-turbo.eu/.

We separatelly use a query returning all buildings in a town to heuristically generate population density in the next steps if no real population density data is provided. An extra query is required for this purpose and it should be given as a **site_based_density_file** argument.

In [6]:
# This block sends queries to OpenStreetMap
# Make sure you have a working internet connection
# If an error occurs during execution, try executing again 
# If the call times out or doesn't finish, try restarting your internet connection by e.g. restarting your computer
site_files=[]
for root,dirs,files in os.walk(sites_path):
    for f in files:
        if f.endswith(".txt") and f != 'buildings.txt':
            site_files.append(sites_path+f)

site_loc, site_type, site_dict, density_site_loc = generate_sites(bbox=bbox, query_files=site_files,
                                site_based_density_file=sites_path+'buildings.txt')

Query 1 OK.
Query 2 OK.
Query 3 OK.
Query 4 OK.
Query 5 OK.


### Site visualization

In [7]:
ill = MapIllustrator()
sitemap = ill.sites_map(bbox=bbox, site_loc=site_loc, site_type=site_type, site_dict = site_dict, map_name='site_distribution')
sitemap

### Generate home location based on various options

* home_loc: list of home coordinates
* people_age: list of age category 
* home_tile: list of map tile to which each home belongs
* tile_loc: list tile center coordinates

The following three options generate a population distribution across a geographical area consisting of tiles (square boxes) of specific resolution. More information about tile sizes can be found in https://wiki.openstreetmap.org/wiki/Zoom_levels. 

In [8]:
density_files=[]
for root,dirs,files in os.walk(population_path):
    for f in files:
        if f.endswith(".csv"):
            density_files.append(population_path+f)

if population_by == 'custom':
    # generate population across tiles based on density input
    home_loc, people_age, home_tile, tile_loc, essential_workers = generate_population(density_files=density_files, bbox=bbox,
        population_per_age_group=population_per_age_group, tile_level=16, seed=42, essential_prop_per_age_group=essential_prop_per_age_group)
    
elif population_by == 'random':
    # generate population across tiles uniformly at random
    home_loc, people_age, home_tile, tile_loc, essential_workers = generate_population(
        bbox=bbox, population_per_age_group=population_per_age_group,
        tile_level=16, seed=42, essential_prop_per_age_group=essential_prop_per_age_group)

elif population_by == 'heuristic':
    # generate population across tiles proportional to buildings per tile
    home_loc, people_age, home_tile, tile_loc, essential_workers = generate_population(bbox=bbox, density_site_loc=density_site_loc,
                            population_per_age_group=population_per_age_group, tile_level=16, seed=42, essential_prop_per_age_group=essential_prop_per_age_group)

### Home visualization

In [9]:
homemap = ill.population_map(bbox=bbox, home_loc=home_loc, map_name='population_distribution')
homemap

Downsample sites as given by settings

In [10]:
if downsample_sites > 1:
    np.random.seed(42)
    # downsample sites like populatoin
    idx = np.random.choice(len(site_loc), size=int(len(site_loc) / downsample_sites), 
                           replace=False, p=np.ones(len(site_loc)) / len(site_loc))

    new_site_loc, new_site_type = [], []
    site_loc, site_type = np.array(site_loc)[idx].tolist(), np.array(site_type)[idx].tolist()

In [11]:
print(f'Number of sites: ', len(site_loc))
print(f'Site types:      ', site_dict)

Number of sites:  68
Site types:       {0: 'education', 1: 'social', 2: 'office', 3: 'supermarket'}


In [12]:
print(np.sum(np.array(site_type)==3))
ill = MapIllustrator()
sitemap = ill.sites_map(bbox=bbox, site_loc=site_loc, site_type=site_type, site_dict = site_dict, map_name='site_distribution_SF_downsampled')
sitemap

5


Compute pairwise distances between all tile centers and all sites

In [13]:
tile_site_dist = compute_distances(site_loc, tile_loc)

### Specify synthetic mobility patterns

Here we specify the patterns of mobility used for generating the synthetic traces based on the above home and site locations. Note that this is a general framework and can by arbitrarilty extended to any desired site numbers or types. See below for an example used in the first version of our paper.

Specify the mean duration of visit per type, or in reality, time spent in crowded places per type.

In [14]:
#  {0: 'education', 1: 'office', 2: 'social', 3: 'supermarket'}
dur_mean_per_type = [6.0, 5.0, 0.64, 0.4]

Determine the number of discrete sites a person visits per site type.

In [15]:
#  {0: 'education', 1: 'office', 2: 'social', 3: 'supermarket'}
variety_per_type = [1, 1, 10, 2]

Set the number of visits per week that each group makes per type of site

In [16]:
# e.g. line 0 corresponds to age 0-4 : 
# no office, a lot of education (kindergarden), some social, no supermarket, no public transport 
mob_rate_per_age_per_type = [
       [5, 0, 0, 0], # 0-14
       [5, 0, 0, 0], # 5-14
       [5, 0, 3.6, 0.22], # 15-19
       [1.48, 3.52, 3.6, 0.21], # 20-24
       [0, 5, 3.6, 0.27], # 25-44
       [0, 5, 3.6, 0.36], # 45-59
       [0, 0, 3.6, 0.35], # 60-79
       [0, 0, 3.6, 0.35]] # 80+
    
# convert to average visits per hour per week, to be compatible with simulator
mob_rate_per_age_per_type = np.divide(np.array(mob_rate_per_age_per_type), (24.0 * 7))

Set time horizon and delta. Due to the data horizon considered for inference, we use 17 days. The setting for delta is explained in the paper.

In [17]:
# time horizon
max_time = 17 * 24.0 # data availability
delta  = 4.6438 # as set by distributions

In [18]:
print('Population (by Age): ', population_per_age_group)
print('Sites (by type):     ',  [(np.array(site_type) == i).sum() for i in range(len(site_dict))])

print('Total:', sum(population_per_age_group), len(site_type))

Population (by Age):  [194, 296, 154, 263, 1646, 835, 682, 181]
Sites (by type):      [2, 34, 27, 5]
Total: 4251 68


Save arguments for the class object instantiation to be able to initiate `MobilitySimulator` on the fly during inference. That is more efficient than pickling in some cases.

In [23]:
kwargs = dict(home_loc=home_loc, people_age=people_age, site_loc=site_loc,
    site_type=site_type, mob_rate_per_age_per_type=mob_rate_per_age_per_type,
    dur_mean_per_type=dur_mean_per_type, variety_per_type=variety_per_type, delta=delta,
    home_tile=home_tile, tile_site_dist=tile_site_dist, essential_workers=essential_workers)

with open(f'lib/mobility/SF_settings_{downsample_population}_{downsample_sites}.pk', 'wb') as fp:
    pickle.dump(kwargs, fp)

Create mobility traces as above, or comment in the last section bleow to specify fully artifial traces.

In [24]:
mob = MobilitySimulator(**kwargs)
mob.verbose = True

In [25]:
%time mob.simulate(max_time=max_time, seed=12345)
# %time mob.to_pickle(f'tu_mobility_{downsample_population}_{downsample_sites}.pk')

Simulate mobility for 408.00 time units... Simulated 55564 visits.
CPU times: user 11.5 s, sys: 157 ms, total: 11.6 s
Wall time: 11.6 s
