In [1]:
import sys # for automation and parallelisation
manual, scenario = (True, 'base') if 'ipykernel' in sys.argv[0] else (False, sys.argv[1])
if manual:
    %matplotlib inline

In [2]:
import pandas as pd
from quetzal.model import stepmodel
from quetzal.io import excel

PyTables is not installed. No support for HDF output.
SQLalchemy is not installed. No support for SQL output.


In [3]:
model_path = '../model/' + scenario + '/'
input_path = '../input/transport_demand/'
output_path = '../output/'

In [4]:
# Load scenario parameters
params = excel.read_var(file='../input/parameters.xls', scenario=scenario)

In [5]:
# Transport demand segments
segments = [s.strip() for s in params['general']['demand_segments'].split(';')]
purposes = ['commuting', 'business', 'education', 'buy/execute', 'leisure', 'accompany']
purp_n_dict = dict(zip(purposes, [1,2,3,4,6,7]))

# Calculate volumes
$P = \text{probability}(O, \text{distance class}, \text{segment})$ <br>
$Vol = (P*\text{population}_{\text{segment}}*n_{\text{expected trips per person, segment}})$ <br>


## Trips per person from MiD2017

(Ortuzar 2011 [S. 166], Daly 1997): Define groups of individuals and determine for each group the number of trips per purpose.

Here the group of individuals is given by car availability and the urbanisation degree of the home zone. The deterimination of the number of trip per purpose is done by taking the mean value of number of reported trips per person in MiD2017. 

In [6]:
# Load data on trips and persons from MiD survey
trips = pd.read_csv(input_path + 'calibration_all_trips_MiD2017.csv',
                    usecols=['P_ID', 'W_HOCH', 'urbanisation', 'purpose_model', 'car_avail',
                             'origin', 'destination'])
persons = pd.read_csv(input_path + 'MiD2017_Regional_Personen.csv', sep=';', decimal=',',
                      usecols=['HP_ID_Reg', 'P_HOCH', 'P_GEW', 'P_RBW_ANZ'])
# Define and rename relevant columns
persons = persons[['HP_ID_Reg', 'P_HOCH', 'P_GEW', 'P_RBW_ANZ']]
persons.columns = ['P_ID', 'P_HOCH', 'weight', 'num_rbw']
# clip person informations to trips table
trips = trips.merge(persons, on='P_ID')

In [7]:
# total trips per day (million)
# Only trips with specified origin and destination and other filters as in cal10
total_trips_MiD = trips['W_HOCH'].sum() / 1e6
total_trips_MiD

67.25827528747712

In [8]:
# Total population of Germany in 2017 (million)
total_pop_MiD = persons['P_HOCH'].sum() / 1e6
total_pop_MiD

82.17568400000002

In [9]:
# drop ureported car availability
trips = trips.loc[trips['car_avail'].isin([0,1])]
# set unreported number of regular business trips (regelmäßige berufliche Wege, rbw) to 0
rbw_dict = {n: 0 for n in [94, 99, 200, 607, 608]}
trips['num_rbw'] = trips['num_rbw'].replace(rbw_dict)
trips.shape

(260709, 10)

In [10]:
# get trips per person for each segment
urban_pop_dict = trips.drop_duplicates('P_ID').groupby('urbanisation').sum()['P_HOCH'].to_dict()
def trips_per_person(g):
    return g['W_HOCH'].sum() / urban_pop_dict[g['urbanisation'].unique()[0]]
segmented_trips_pp = trips.groupby(['urbanisation', 'purpose_model', 'car_avail']
                                  ).apply(trips_per_person)

In [11]:
segmented_trips_pp.unstack('urbanisation')

Unnamed: 0_level_0,urbanisation,1,2,3
purpose_model,car_avail,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
accompany,0,0.01568,0.003865,0.001045
accompany,1,0.197963,0.227818,0.225678
business,0,0.018381,0.0029,0.002503
business,1,0.079834,0.078985,0.078122
buy/execute,0,0.148107,0.030932,0.032978
buy/execute,1,0.574955,0.777193,0.797909
commuting,0,0.10861,0.018408,0.010831
commuting,1,0.396461,0.449155,0.4672
education,0,0.035237,0.003708,0.0018
education,1,0.117327,0.147167,0.153143


## Map generation to zones

In [12]:
# Load zones
sm = stepmodel.read_json(model_path + 'de_zones')

In [13]:
# Put trip generation in zones table
for p in purposes:
    for c in [0,1]:
        sm.zones[str(((p, c), 'generation_MiD'))] = [segmented_trips_pp[u][p][c] * pop * 365
                                                     for u, pop in zip(sm.zones['urbanisation'], sm.zones['population'])]

In [14]:
# Sum of trips per day with model population (million)
sm.zones[[str(((p, c), 'generation_MiD')) for p in purposes for c in [0,1]]].sum().sum() / 1e6 / 365

201.66808606211438

## Volumes from  destination choice probabilities
result: $P*\text{population}_{\text{segment, zone}}*n_{\text{expected trips per person, segment, zone}}$

In [15]:
# Load probability results from logit step
gdm = stepmodel.read_json(model_path + 'de_destination_choice')

In [16]:
# Extract probability values and create new table from it
prob = gdm.probabilities.set_index(['origin', 'segment']).drop(columns=['destination', 'root'])
prob.columns.name = 'destination'
volumes = prob.stack('destination').unstack('segment').reset_index()
volumes.columns.name = None
for seg in set(segments) - set(volumes.columns):
    volumes[seg] = 0
volumes.sample(2)

Unnamed: 0,origin,destination,accompany_car,accompany_no_car,business_car,business_no_car,buy/execute_car,buy/execute_no_car,commuting_car,commuting_no_car,education_car,education_no_car,leisure_car,leisure_no_car
4348780,DED53_3,DE723_6,1.2e-09,7.623e-07,3e-06,5.1e-05,0.0,2.82e-08,0.0,3.7e-09,7.011906e-17,5e-10,7.8e-07,0.0
3696850,DEA57_2,DE724_2,0.01029781,0.01216236,0.00333,0.001004,0.0083,0.01039403,0.007072,0.01121597,0.006421545,0.008825549,0.009657903,0.0


In [17]:
# Create inner-zonal probabilities
in_mask = volumes['origin']==volumes['destination']
if len(volumes.loc[in_mask]) == 0:
    volumes = volumes.append(pd.DataFrame({'origin': list(sm.zones.index),
                                           'destination': list(sm.zones.index)})
                            ).reset_index()
    in_mask = volumes['origin']==volumes['destination']
car_names = {'no': 0, 'car': 1}
urban_dict = sm.zones['urbanisation'].to_dict()
for seg in segments:
    # Calculate mean inner-zonal probability by segment and urbanisation from MiD2017
    mask = (trips['purpose_model']==seg.split('_')[0]) \
            & (trips['car_avail']==car_names[seg.split('_')[1]])
    probs = trips.loc[(trips['origin']==trips['destination']) & mask
                     ].groupby('urbanisation')['origin'].count() / \
            trips.loc[mask].groupby('urbanisation')['origin'].count()
    # Correct probabilities in volumes
    volumes.loc[in_mask, seg] = volumes.loc[in_mask, 'origin'].map(urban_dict).map(probs)
    volumes.loc[~in_mask, seg] = volumes.loc[~in_mask, seg] * \
                                 volumes.loc[~in_mask, 'origin'].map(urban_dict).map(1 - probs)

In [18]:
# calculate number of trips
for s in segments:
    generation_dict = sm.zones[str(((s.split('_')[0], car_names[s.split('_')[1]]), 'generation_MiD'))].to_dict()
    volumes[s] = volumes[s] * volumes['origin'].map(generation_dict)

In [19]:
# check the sum
volumes[segments].sum().sum() / 1e6 / 365

199.9836865896257

## Save

In [20]:
# Save volumes table
gdm.volumes = volumes.reset_index(drop=True)
gdm.to_json(model_path + 'de_volumes_endo', only_attributes=['volumes'],
           encoding='utf-8')

to_hdf(overwriting): 100%|█████████████████████████████████████████████████████████████| 14/14 [00:46<00:00,  3.33s/it]
