In [None]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from quetzal.model import stepmodel
from quetzal.io import excel

# Validate different scenarios against each other

In [None]:
scenario_ref = 'reference'
scenarios = [scenario_ref] + ['Avoid', 'Shift', 'Avoid+Shift']

In [None]:
input_path = '../input/'
network_path = '../input_static/'
output_path = '../output/'
model_path = '../model/'
params = excel.read_var(file='../input/parameters.xls', scenario=scenario_ref)
segments = [s.strip() for s in params['general']['demand_segments'].split(';')]
purposes = [s.split('_')[0] for s in segments][::2]

In [None]:
ref = stepmodel.read_json(model_path + scenario_ref + '/de_zones')
ref.zones = gpd.GeoDataFrame(ref.zones)

In [None]:
# Load distances
distances = pd.read_csv(output_path + '/distances_centroids.csv')
distances = distances.set_index(['origin', 'destination'])

In [None]:
# Load the calibration dataset for validation
if False:
    mid2017 = pd.read_csv(input_path + 'transport_demand/calibration_all_trips_MiD2017.csv')
    # Replace LAU codes with NUTS IDs
    assert str(mid2017.loc[0, 'origin']).startswith('DE')
    # Rename modes and purposes
    mode_dict_mid = {1: 'rail_short', 2: 'rail_long',
                     3: 'coach', 4: 'bus', 5: 'air', 6: 'car', 7: 'walk'}
    mid2017['mode_model'] = mid2017['mode_model'].map(mode_dict_mid)
    mid2017['purpose_model'] = mid2017['purpose_model'].apply(lambda s: s.split('_')[0])
    mid2017['segment'] = mid2017['purpose_model'] + mid2017['car_avail'].map(
        {1: '_car', 0: '_no_car', 9: '_no_car'})
    mid2017 = mid2017[['mode_model', 'purpose_model', 'segment', 'origin', 'destination']]

# Composite cost

Percieved cost averaged over all modes by origin-destination pair

In [None]:
# Load CC
cc = {}
for scenario in scenarios:
    cc[scenario] = pd.read_csv(output_path + scenario + '/mode_choice_od_composite_cost.csv')

In [None]:
# Differences to reference scenario
fig, ax = plt.subplots(nrows=len(scenarios)-1, ncols=3, figsize=(15,(len(scenarios)-1)*5),
                       sharex='all', sharey='all')
label_step = 90
ref_matrix = cc[scenario_ref].set_index(['origin', 'destination']).min(axis=1).unstack('destination')
for i in range(1, len(scenarios)):
    i -= 1
    ax[i,0].set_ylabel(scenario)
    matrix = cc[scenario].set_index(['origin', 'destination']).min(axis=1).unstack('destination')
    ax[i,0].imshow((matrix - ref_matrix).fillna(0).values)
    matrix = cc[scenario].set_index(['origin', 'destination']).mean(axis=1).unstack('destination')
    ax[i,1].imshow((matrix - ref_matrix).fillna(0).values)
    matrix = cc[scenario].set_index(['origin', 'destination']).max(axis=1).unstack('destination')
    ax[i,2].imshow((matrix - ref_matrix).fillna(0).values)
    ylabels = list(matrix.columns)
    ax[i,0].set_yticks(range(0, len(ylabels), label_step), labels=ylabels[::label_step], fontsize=8)
    if i == len(scenarios)-2:
        xlabels = list(matrix.index)
        ax[i,0].set_xticks(range(0, len(xlabels), label_step), labels=xlabels[::label_step], fontsize=8, rotation=90)
        ax[i,1].set_xticks(range(0, len(xlabels), label_step), labels=xlabels[::label_step], fontsize=8, rotation=90)
        ax[i,2].set_xticks(range(0, len(xlabels), label_step), labels=xlabels[::label_step], fontsize=8, rotation=90)
        ax[i,0].set_xlabel('Min')
        ax[i,1].set_xlabel('Mean')
        ax[i,2].set_xlabel('Max')

In [None]:
# Define distance classes
bins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 120, 150, 200, 300, 500, 1000]
labels = ['{}-{}km'.format(bins[i], bins[i+1]) for i in range(len(bins)-1)]
for scenario in scenarios:
    cc[scenario]['dist'] = cc[scenario].set_index(['origin', 'destination']).index.map(distances.to_dict()['length'])
    cc[scenario]['bins'] = pd.cut(cc[scenario]['dist'], bins=bins, labels=labels)

In [None]:
fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(15,10),
                       sharex='all', sharey='all')
for scenario in scenarios:
    cc[scenario].loc[cc[scenario].notna().all(axis=1)].groupby('bins')[segments].mean().min(axis=1).plot.line(
        legend=True, label=scenario, ax=ax[0])
    cc[scenario].loc[cc[scenario].notna().all(axis=1)].groupby('bins')[segments].mean().mean(axis=1).plot.line(
        legend=True, label=scenario, ax=ax[1])
    cc[scenario].loc[cc[scenario].notna().all(axis=1)].groupby('bins')[segments].mean().max(axis=1).plot.line(
        legend=True, label=scenario, ax=ax[2])
ax[0].set_ylabel('Min')
ax[1].set_ylabel('Mean')
ax[2].set_ylabel('Max')

In [None]:
cc = None

# Volumes

Number of trips per year

In [None]:
# Load volumes
vols = {}
for scenario in scenarios:
    vols[scenario] = stepmodel.read_zippedpickles(model_path + scenario + '/de_volumes')

In [None]:
# Inner- and inter-zonal volumes by segment
seg_df = pd.DataFrame()
for scenario in scenarios:
    vol = vols[scenario].volumes
    seg_df[scenario+'_inner'] = vol.loc[vol['origin']==vol['destination'], segments].sum()
    seg_df[scenario+'_inter'] = vol.loc[vol['origin']!=vol['destination'], segments].sum()

In [None]:
# Compare inner- and inter-zonal volumes by segment
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12,4))
seg_df.loc[[s for s in segments if '_no_car' in s]].plot.bar( legend=True, ax=ax[0])
seg_df.loc[[s for s in segments if not '_no_car' in s]].plot.bar(legend=True, ax=ax[1])

### Distance distribution

In [None]:
# Define distance classes
bins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 120, 150, 200, 300, 500, 1000]
labels = ['{}-{}km'.format(bins[i], bins[i+1]) for i in range(len(bins)-1)]
for scenario in scenarios:
    vol = vols[scenario].volumes
    vol['dist'] = vol.set_index(['origin', 'destination']).index.map(distances.to_dict()['length'])
    vol['bins'] = pd.cut(vol['dist'], bins=bins, labels=labels)

In [None]:
# Sum up volumes
vol_df = pd.DataFrame()
for scenario in scenarios:
    vol_df[[scenario+' '+seg for seg in segments]] = vols[scenario].volumes.groupby('bins')[segments].sum()
    vol_df[scenario+' sum'] = vol_df[[scenario+' '+seg for seg in segments]].sum(axis=1)
    vol_df[scenario+' sum with car'] = vol_df[[scenario+' '+seg for seg in segments if not '_no_car' in seg]].sum(axis=1)
    vol_df[scenario+' sum without car'] = vol_df[[scenario+' '+seg for seg in segments if '_no_car' in seg]].sum(axis=1)

In [None]:
# plot total
vol_df[[s+' sum' for s in scenarios]].plot(kind='area', stacked=False, alpha=.4, figsize=(12,4))

In [None]:
# plot by car availability
fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(12,4))
vol_df[[s+' sum without car' for s in scenarios]].plot(kind='area', stacked=False, alpha=.4, ax=ax[0])
vol_df[[s+' sum with car' for s in scenarios]].plot(kind='area', stacked=False, alpha=.4, ax=ax[1])

In [None]:
# plot by segment
fig, ax = plt.subplots(ncols=1, nrows=len(segments), figsize=(12,len(segments)*3), sharex='all')
i = 0
for seg in segments:
    vol_df[[scenario+' '+seg for scenario in scenarios]].plot(kind='area', stacked=False, alpha=.4, ax=ax[i])
    i += 1