In [1]:
import sys # for automation and parallelisation
manual, scenario = (True, 'base') if 'ipykernel' in sys.argv[0] else (False, sys.argv[1])

In [2]:
import pandas as pd
import numpy as np
from quetzal.model import stepmodel
from quetzal.engine.engine import od_volume_from_zones
from quetzal.io import excel

In [3]:
model_path = '../model/' + scenario + '/'
input_path = '../input/'
input_static_path = '../input_static/'
output_path = '../output/'

In [4]:
# Load scenario parameters
params = excel.read_var(file='../input/parameters.xls', scenario=scenario)

# OD matrix generation

## Needs zone attributes and generation / destination choice / distribution model results

## Saves volumes table

In [5]:
# Transport demand segments
segments = [s.strip() for s in params['general']['demand_segments'].split(';')]
purposes_compulsory = ['commuting', 'business', 'education']
purposes_optional = ['buy/execute', 'leisure', 'accompany']
segments_compulsory = [s for s in segments if s.split('_')[0] in purposes_compulsory]
segments_optional = [s for s in segments if s.split('_')[0] in purposes_optional]

In [6]:
# Load zones
sm = stepmodel.read_json(model_path + 'de_zones')
sm.zones.sort_index(inplace=True)

In [7]:
# Fill POI values
pois = pd.read_csv(input_path + 'spatial_num_pois_raw.csv', index_col='index')
cats = pd.read_excel(input_path + 'spatial_OSM_POI_list.xlsx', sheet_name='categories')
cats['label'] = (cats['key'] + ' ' + cats['value'].fillna('')).str.strip()
for category, columns in cats.loc[cats['category'].notna()
                                 ].groupby('category').agg(
                                {'label': list})['label'].items():
    sm.zones[category] = sm.zones['FID'].map(pois[columns].sum(axis=1)).fillna(0) \
                         * sm.zones['urbanisation'].map(params['poi_change']).astype(float)

In [8]:
# Load inter-zonal composite cost from mode choice step
try:
    cc = pd.read_csv(output_path + scenario + '/mode_choice_od_composite_cost.csv')
except FileNotFoundError:
    cc = pd.read_csv(output_path + 'base' + '/mode_choice_od_composite_cost.csv')
cc.set_index(['origin', 'destination'], inplace=True)
cc.sort_index(inplace=True)
cc.clip(lower=0.01, inplace=True)

In [9]:
# Load distances
dist = pd.read_csv(output_path + 'distances_centroids.csv', index_col=['origin', 'destination'])

In [12]:
# Define mathematical formulations for the cost term
def power_function(CC, a, b):
    return a * np.power(CC, -b)
def exponential_function(CC, a, b):
    return a * np.exp(-b * CC)
def lognormal_function(CC, a, b):
    return a * np.exp(-b * np.power(np.log(CC + 1), 2))

In [13]:
# Load estimated function parameters
ab = pd.read_csv(input_path + 'estimation_results_distribution.csv', index_col=[0,1])

In [14]:
# Compute volumes from emission and attraction for a given segment
def compute_volumes(segment, emission, attraction, f=lognormal_function):
    sm.zones['emission'] = emission
    sm.zones['attraction'] = attraction
    # Create deterrence matrix from CC
    zone_set = sorted(list(set(sm.zones['FID']).intersection(set(cc.index.get_level_values(0))
                           ).intersection(set(cc.index.get_level_values(1)))))
    deterr = cc[[segment]].merge(dist, left_index=True, right_index=True, how='left')
    deterr['cc_dist'] = deterr[segment] + deterr['length'] / 10
    deterr = deterr['cc_dist'].unstack('destination').loc[zone_set, zone_set]
    deterr.fillna(1e3, inplace=True)
    # Add inner-zonal resistance: the minimum
    for z in deterr.index:
        deterr.loc[z,z] = deterr.min(axis=1)[z] / 2
    # convert CC to deterrence
    deterr = f(deterr, ab.loc[(segment, f.__name__), 'a'], ab.loc[(segment, f.__name__), 'b'])
    # Run doubly constrained distribution
    vol = od_volume_from_zones(sm.zones.loc[sm.zones['FID'].isin(zone_set)], deterr)
    return vol.fillna(0).set_index(['origin', 'destination'])

In [15]:
volumes = pd.DataFrame()

## Compulsory trips

Choose the doubly constrained gravity model as distribution method because logit models don't perform well for purposes commuting, business and education.

In [16]:
# Volumes for commuting
days = params['generation']['days_at_work_with_car']
volumes['commuting_car'] = compute_volumes(
    'commuting_car',
    sm.zones['employed'] * sm.zones['car_avail_hh'] * days * 2,
    sm.zones['employment'] * sm.zones['car_avail_hh'] * days * 2,
    exponential_function)
days = params['generation']['days_at_work_without_car']
volumes['commuting_no_car'] = compute_volumes(
    'commuting_no_car',
    sm.zones['employed'] * (1 - sm.zones['car_avail_hh']) * days * 2,
    sm.zones['employment'] * (1 - sm.zones['car_avail_hh']) * days * 2,
    exponential_function)

In [17]:
# Volumes for business
# Average business trips per day from employed persons
days = params['generation']['days_at_work_with_car']
factor = params['generation']['business_trip_factor_with_car']
volumes['business_car'] = compute_volumes(
    'business_car',
    sm.zones['employed'] * sm.zones['car_avail_hh'] * days * 2 * factor,
    sm.zones['employment'] * sm.zones['car_avail_hh'] * days * 2 * factor)
factor = params['generation']['business_trip_factor_without_car']
volumes['business_no_car'] = compute_volumes(
    'business_no_car',
    sm.zones['employed'] * (1 - sm.zones['car_avail_hh']) * days * 2 * factor,
    sm.zones['employment'] * (1 - sm.zones['car_avail_hh']) * days * 2 * factor)

In [15]:
# Load data for education
age_groups = pd.read_csv(input_static_path + 'spatial_Zensus_ages_2017_GENESIS.csv',
                         encoding='latin-1', sep=';', skiprows=5, skipfooter=4, na_values='-').dropna()
age_groups.rename(columns={'Unnamed: 1': 'lau_id'}, inplace=True)
age_groups.loc[age_groups['lau_id']==16056, 'lau_id'] = 16063 # Update Eisenach
age_groups = age_groups.groupby('lau_id').sum()
if not 'lau_id' in sm.zones.columns:
    sm.zones['lau_id'] = sm.zones['FID'].astype(str).str[:-4]
sm.zones['is_urban'] = (sm.zones['urbanisation']==1) & (sm.zones['population']>100000)

  age_groups = pd.read_csv(input_static_path + 'spatial_Zensus_ages_2017_GENESIS.csv',


In [16]:
# Volumes for education
# Emissions are people in education (school, apprenticeship, higher education)
# Attractions are corresponding institutions
emission = \
    {'higher_education':
        sm.zones['lau_id'].map(
            age_groups['18 bis unter 20 Jahre']
            + age_groups['20 bis unter 25 Jahre']
        ) * sm.zones['pop_share']
        * sm.zones['is_urban'].map(lambda urban: params['generation']['students_per_18-25yo_urban']
                                   if urban else params['generation']['students_per_18-25yo_non-urban']),
    'school':
        sm.zones['lau_id'].map(
            age_groups['6 bis unter 10 Jahre']
            + age_groups['10 bis unter 15 Jahre']
            + age_groups['15 bis unter 18 Jahre']
        ) * sm.zones['pop_share']
        * sm.zones['is_urban'].map(lambda urban: params['generation']['pupils_per_6-18yo_urban']
                                   if urban else params['generation']['pupils_per_6-18yo_non-urban']),
    'employment':
        sm.zones['lau_id'].map(
            age_groups['18 bis unter 20 Jahre']
        ) * sm.zones['pop_share']
        * sm.zones['is_urban'].map(lambda urban: params['generation']['apprentices_per_18-20yo_urban']
                                   if urban else params['generation']['apprentices_per_18-20yo_non-urban'])
   }

vol_df = pd.DataFrame(index=volumes.index)
institutions = ['higher_education', 'school', 'employment']
for institution in institutions:
    days = params['generation']['edu_days_in_'+institution]
    no_car_share = params['generation']['edu_share_without_car']
    # With car available
    vol_df[institution+'_car'] = compute_volumes(
        'education_car',
        emission[institution] * sm.zones['car_avail_hh'] * days * 2,
        sm.zones[institution] * sm.zones['car_avail_hh'] * days * 2)
    # Without car available
    vol_df[institution+'_no_car'] = compute_volumes(
        'education_no_car',
        emission[institution] * sm.zones['car_avail_hh'] * days * 2,
        sm.zones[institution] * sm.zones['car_avail_hh'] * days * 2)
    
# Sum up all these education groups
volumes['education_car'] = vol_df[[i+'_car' for i in institutions]].sum(axis=1)
volumes['education_no_car'] = vol_df[[i+'_no_car' for i in institutions]].sum(axis=1)

  F = F / worse
  F = F / worse
  F = F / worse
  F = F / worse
  F = F / worse
  F = F / worse


## Volumes for non-compulsory trips

In [17]:
# Load inter-zonal trip generation
try:
    generation = pd.read_csv(model_path + 'generation_volumes_inter.csv', index_col=0)
except FileNotFoundError:
    generation = pd.read_csv(model_path.replace(scenario, 'base') + 'generation_volumes_inter.csv', index_col=0)

In [18]:
# Calculate number of trips
for s in segments_optional:
    volumes[s] = compute_volumes(s, sm.zones['FID'].map(generation[s]), sm.zones['population'])

In [18]:
len(volumes)

21160000

In [19]:
volumes.sample()

Unnamed: 0_level_0,Unnamed: 1_level_0,commuting_car,commuting_no_car,business_car,business_no_car
origin,destination,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
145245104,146280410,3256.447217,2.268295,767.904852,3.36948


In [20]:
# Drop rows with NaN
volumes = volumes.loc[~volumes.isna().any(axis=1)]
len(volumes)

21160000

In [None]:
# check the sum (million trips per day)
print('Million trips per day: {}'.format(volumes[segments].sum().sum() / 1e6 / 365))

## Policies

In [23]:
# Multi-purpose densified quarters in agglomeration areas around large cities
max_dist = params['trip_reduction_suburban_quarters']['max_dist_to_city']
if max_dist > 0:
    # Find quarters
    zones = gpd.GeoDataFrame(sm.zones, crs=sm.epsg).to_crs('EPSG:5234') # distance to meter
    cities = zones.loc[(zones['urbanisation']==1) & (zones['population']>200000)]
    centroids = gpd.GeoDataFrame(index=zones.loc[zones['urbanisation']==2].index,
                                 geometry=zones.loc[zones['urbanisation']==2, 'geometry'].centroid)
    quarters = gpd.sjoin_nearest(centroids, cities[['geometry']], max_distance=max_dist)
    # Reduce trips
    for seg in segments:
        volumes.loc[volumes['origin'].isin(quarters.index), seg] *= \
            1 - params['trip_reduction_suburban_quarters'][seg]

## Save

In [22]:
# Save volumes table
sm.volumes = volumes
#sm.volumes = volumes.reset_index(drop=True)
sm.to_zippedpickles(model_path + 'de_volumes', only_attributes=['volumes'])

volumes: 100%|████████████████████████████████████████████████████| 9/9 [00:24<00:00,  2.75s/it]
