# Generates Mobility file for inference

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
if '..' not in sys.path:
    sys.path.append('..')
    
from matplotlib import pyplot as plt
%matplotlib inline

import pandas as pd
import numpy as np
import networkx as nx
import copy
import scipy as sp
import math
import seaborn
import pickle
import warnings
import os

from lib.mobilitysim import MobilitySimulator
from lib.town_data import generate_population, generate_sites, compute_distances
from lib.town_maps import MapIllustrator

### Settings for synthetic mobility data generation

Import __one__ `town_settings` file. The following variables will be imported by the `import *` command
* `town_name`
* `population_path`
* `sites_path`
* `bbox`
* `population_per_age_group`
* `region_population`
* `town_population`
* `daily_tests_unscaled`
* `household_info`

In [None]:
# from lib.settings.town_settings_kaiserslautern import *
# from lib.settings.town_settings_ruedesheim import *
# from lib.settings.town_settings_tirschenreuth import *
# from lib.settings.town_settings_tubingen import *
from lib.settings.town_settings_sanfrancisco import *

# from lib.settings.town_settings_lausanne import *
# from lib.settings.town_settings_locarno import *
# from lib.settings.town_settings_lucerne import *
# from lib.settings.town_settings_jura import *

In [None]:
# Downsampling factor of population and sites
downsample = 100

# Country for different age groups
country = 'US' # 'GER', 'CH'

# Set the population generation mode.
# 3 options available: custom | random | heuristic
population_by = 'custom'

### Nothing should be changed below

---

#### Town details

In [None]:
# Downsample population 
population_per_age_group = np.round(
    population_per_age_group * (town_population / (downsample * region_population))).astype('int').tolist()

print(f'Population per age group: {population_per_age_group}')

In [None]:
model_essential=True
if model_essential == True:
    # proportion of total population that are essential workers
    prop_essential_total = 0.5

    # proportion of all essential workers within each age group
    prop_essential_per_age_group = np.array([
        0,   # 0-4
        0,   # 5-14
        .04,  # 15-19
        .06,  # 20-24
        .45,  # 25-44
        .24,  # 45-59
        .20, # 60-79
        0])  # 

    # proportion of each age group that are essential workers
    essential_prop_per_age_group = (prop_essential_per_age_group*prop_essential_total) / (np.array(population_per_age_group) / sum(population_per_age_group))
    print(f'Proportion of age groups that are essential workers: {essential_prop_per_age_group}')
else:
    essential_prop_per_age_group = None

#### Extracted site data

* `site_loc`: list of site coordinates
* `site_type`: list of site category
* `site_dict`: helper dictionary with real name (string) of each site category (int)
* `density_site_loc`: list of site coordinates of specific type to be based on to generate population density

To generate sites of arbitrary sites for a given city, the following function sends queries to OpenStreetMap. In order to use it for additional types of sites, you need to specify queries in the Overpass API format. For more information, check the existing queries in **/lib/data/queries/**, https://wiki.openstreetmap.org/wiki/Overpass_API and http://overpass-turbo.eu/.

We separatelly use a query returning all buildings in a town to heuristically generate population density in the next steps if no real population density data is provided. An extra query is required for this purpose and it should be given as a **site_based_density_file** argument.

In [None]:
# This block sends queries to OpenStreetMap
# Make sure you have a working internet connection
# If an error occurs during execution, try executing again 
# If the call times out or doesn't finish, try restarting your internet connection by e.g. restarting your computer
site_files=[]
for root,dirs,files in os.walk(sites_path):
    for f in files:
        if f.endswith(".txt") and f != 'buildings.txt':
            site_files.append(sites_path+f)

site_loc, site_type, site_dict, density_site_loc = generate_sites(bbox=bbox, query_files=site_files,
                                site_based_density_file=sites_path+'buildings.txt')

#### Site visualization

In [None]:
ill = MapIllustrator()
sitemap = ill.sites_map(bbox=bbox, site_loc=site_loc, site_type=site_type, site_dict = site_dict, map_name=f'{town_name}_site_distribution')
sitemap

#### Generate home location based on various options

* `home_loc`: list of home coordinates
* `people_age`: list of age category 
* `home_tile`: list of map tile to which each home belongs
* `tile_loc`: list tile center coordinates

The following three options generate a population distribution across a geographical area consisting of tiles (square boxes) of specific resolution. More information about tile sizes can be found in https://wiki.openstreetmap.org/wiki/Zoom_levels. 

In [None]:
if region_population == town_population:
    tile_level = 15
else:
    tile_level = 16

if population_by == 'custom':
    # generate population across tiles based on density input
    print('Tile level: ', tile_level)
    home_loc, people_age, home_tile, tile_loc, people_household, essential_workers = generate_population(
        density_file=population_path, bbox=bbox,
        population_per_age_group=population_per_age_group, 
        household_info=household_info, tile_level=tile_level, seed=42,
        essential_prop_per_age_group=essential_prop_per_age_group)
    
elif population_by == 'random':
    # generate population across tiles uniformly at random
    home_loc, people_age, home_tile, tile_loc, people_household, essential_workers = generate_population(
        bbox=bbox, population_per_age_group=population_per_age_group,
        tile_level=16, seed=42,
        essential_prop_per_age_group=essential_prop_per_age_group)

elif population_by == 'heuristic':
    # generate population across tiles proportional to buildings per tile
    home_loc, people_age, home_tile, tile_loc, people_household, essential_workers = generate_population(
        bbox=bbox, density_site_loc=density_site_loc,
        population_per_age_group=population_per_age_group, tile_level=16, seed=42,
        essential_prop_per_age_group=essential_prop_per_age_group)

In [None]:
unique_household, counts_household = np.unique(people_household, return_counts=True)
plt.hist(counts_household,bins=range(1,9),align='left',rwidth=0.5)
plt.xlabel('Household Size')
plt.ylabel('Number of Households')

#### Home visualization

In [None]:
homemap = ill.population_map(bbox=bbox, home_loc=home_loc, map_name=f'{town_name}_population_distribution')
homemap # zoom in to see details

Downsample sites as given by settings

In [None]:
if downsample > 1:
    np.random.seed(42)
    # downsample sites like populatoin
    idx = np.random.choice(len(site_loc), size=int(len(site_loc) / downsample), 
                           replace=False, p=np.ones(len(site_loc)) / len(site_loc))

    site_loc, site_type = np.array(site_loc)[idx].tolist(), np.array(site_type)[idx].tolist()

In [None]:
print(f'Number of sites: ', len(site_loc))
print(f'Site types:      ', site_dict)

Compute pairwise distances between all tile centers and all sites

In [None]:
tile_site_dist = compute_distances(site_loc, tile_loc)

#### Specify synthetic mobility patterns

Here we specify the patterns of mobility used for generating the synthetic traces based on the above home and site locations. Note that this is a general framework and can by arbitrarilty extended to any desired site numbers or types. See below for an example used in the first version of our paper.

Specify the mean duration of visit per type, or in reality, time spent in crowded places per type.

In [None]:
# # 2h at education, 1.5h at social places, 0.2h public transport, 2h office, 0.5h supermarket
# # (see site_dict for ordering)
# dur_mean_per_type = [2, 1.5, 0.2, 2, 0.5]

Determine the number of discrete sites a person visits per site type.

In [None]:
# # 1 education, 10 social places, 5 public transport, 1 office, 2 supermarket
# # (see site_dict for ordering)
# variety_per_type = [1, 10, 5, 1, 2]

Set the number of visits per week that each group makes per type of site

In [None]:
# e.g. line 0 corresponds to age 0-4 in Germany
# a lot of eduction (kindergarden), some social, no public transport, no office, no supermarket
# the age groups are chosen to match the age groups used in case data by national authorities
# GERMANY
if country == 'GER':
    mob_rate_per_age_per_type = [
        [5, 1, 0, 0, 0], # 0-4
        [5, 2, 3, 0, 0], # 5-14
        [2, 2, 3, 3, 1], # 15-34
        [0, 2, 1, 5, 1], # 35-59
        [0, 3, 2, 0, 1], # 60-79
        [0, 2, 1, 0, 1]]  # 80+
    dur_mean_per_type = [2, 1.5, 0.2, 2, 0.5]
    variety_per_type = [1, 10, 5, 1, 2]

# SWITZERLAND
elif country == 'CH':
    mob_rate_per_age_per_type = [
       [5, 1, 0, 0, 0], # 0-9
       [5, 2, 3, 0, 0], # 10-19
       [2, 2, 3, 3, 1], # 20-29
       [2, 2, 3, 3, 1], # 30-39
       [0, 2, 1, 5, 1], # 40-49
       [0, 2, 1, 5, 1], # 50-59
       [0, 3, 2, 0, 1], # 60-69
       [0, 3, 2, 0, 1], # 70-79
       [0, 2, 1, 0, 1]] # 80+
    dur_mean_per_type = [2, 1.5, 0.2, 2, 0.5]
    variety_per_type = [1, 10, 5, 1, 2]
    
elif country == 'US':
    mob_rate_per_age_per_type = [
       [5, 0, 0, 0], # 0-14
       [5, 0, 0, 0], # 5-14
       [5, 0, 3.6, 0.22], # 15-19
       [1.48, 3.52, 3.6, 0.21], # 20-24
       [0, 5, 3.6, 0.27], # 25-44
       [0, 5, 3.6, 0.36], # 45-59
       [0, 0, 3.6, 0.35], # 60-79
       [0, 0, 3.6, 0.35] # 80+
    ]
    #  {0: 'education', 1: 'office', 2: 'social', 3: 'supermarket'}
    dur_mean_per_type = [6.0, 5.0, 0.64, 0.4]
    #  {0: 'education', 1: 'office', 2: 'social', 3: 'supermarket'}
    variety_per_type = [1, 1, 10, 2]
else:
    raise ValueError('Invalid country code.')
    
# convert to average visits per hour per week, to be compatible with simulator
mob_rate_per_age_per_type = np.divide(np.array(mob_rate_per_age_per_type), (24.0 * 7))

Set `delta`; the setting for delta is explained in the paper.

In [None]:
# time horizon
delta  = 4.6438 # as set by distributions

In [None]:
print('Population (by Age): ', population_per_age_group)
print('Sites (by type):     ',  [(np.array(site_type) == i).sum() for i in range(len(mob_rate_per_age_per_type[0]))])

print('Total:', sum(population_per_age_group), len(site_type))

Save arguments for the class object instantiation to be able to initiate `MobilitySimulator` on the fly during inference. That is more efficient than pickling in some cases.

In [None]:
kwargs = dict(
    home_loc=home_loc, 
    people_age=people_age, 
    site_loc=site_loc, 
    num_people_unscaled=town_population,
    region_population=region_population,
    site_type=site_type, 
    site_dict=site_dict, 
    downsample=downsample,
    mob_rate_per_age_per_type=mob_rate_per_age_per_type,
    daily_tests_unscaled=daily_tests_unscaled, 
    dur_mean_per_type=dur_mean_per_type, 
    variety_per_type=variety_per_type, 
    delta=delta,
    home_tile=home_tile, 
    tile_site_dist=tile_site_dist, 
    people_household=people_household,
    essential_workers=essential_workers)

with open(f'lib/mobility/{town_name}_settings_{downsample}.pk', 'wb') as fp:
    pickle.dump(kwargs, fp)

Create mobility traces as above, or comment in the last section below to specify fully artifial traces.

In [None]:
mob = MobilitySimulator(**kwargs)
mob.verbose = True

In [None]:
max_time = 17 * 24.0 # e.g. 17 days
%time mob.simulate(max_time=max_time, seed=12345)
# %time mob.to_pickle(f'tu_mobility_{downsample_population}_{downsample_sites}.pk')