In [4]:
import sys
if 'ipykernel_launcher.py' not in sys.argv[0]:
    manual = False
    scenario = sys.argv[1]
else:
    manual = True
    scenario = 'intermodal'
    %matplotlib inline

In [10]:
sys.path.insert(0, r'../../../quetzal')
from quetzal.model import stepmodel
from quetzal.io import excel
from syspy.io.geojson_utils import gdf_to_geojson
import datetime
import winsound

import pandas as pd
import os
import json
import geopandas as gpd

import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("whitegrid")

In [11]:
qpath = '../../'
mpath = qpath + 'model/'
ipath = qpath + 'inputs/'
opath = qpath + 'outputs/'

In [12]:
plt.rcParams.update({'font.size': 25})

# Define list of tests

In [13]:
if scenario == 'monomodal':
    fare_test = [
        ('base', 'fx', [25, 30, 35, 40, 45, 50, 55]),
        ('add', 'fx', [25, 30, 35, 40, 45, 50, 55])
    ]

if scenario == 'intermodal':
    fare_test = [
        ('base', 'fx', [20, 25, 30, 35, 40, 45, 50, 55]),
        ('add', 'fx', [5, 10, 15, 20])
    ]

if scenario == 'multimodal':
    fare_test = [
        ('base', 'fx', [45, 50, 55, 60, 65, 70, 80, 90])
    ]

all_tests = {
    'fares': fare_test
}

In [142]:
parameter_frame = pd.read_excel(qpath + r'inputs/parametros.xlsx').set_index(['category','parameter'])

for c in parameter_frame.columns:
    parent = parameter_frame[c][('general', 'parent')]
    try:
        parent = parent.iloc[0]
    except AttributeError:# 'str' object has no attribute 'iloc'
        pass
    parameter_frame[c] = parameter_frame[c].fillna(parameter_frame[parent])

base_scenario_parameter_frame = pd.DataFrame(parameter_frame[scenario]).dropna()

In [143]:
import itertools
import shutil

def create_test_parameters_dataframe(
    test, test_name, reference_parameters_dataframe=base_scenario_parameter_frame
):
    # Create scenario parameters combination dataframe
    parameter_values = [v[2] for v in test]
    data = list(itertools.product(*parameter_values))

    if scenario == 'monomodal':
        
        def monomodal(i):
            if i[0]==i[1]:
                return True
            else:
                return False

        it = data.copy()

        for d in it:
            if not monomodal(d):
                data.remove(d)

    if scenario == 'intermodal':
        
        def intermodal(i):
            if i[0]==i[1]:
                return False
            else:
                return True

        it = data.copy()

        for d in it:
            if not intermodal(d):
                data.remove(d)

    df = pd.DataFrame(
        data=data,
        columns=pd.MultiIndex.from_arrays([[v[0] for v in test], [v[1] for v in test]]),
        index=[test_name +'_'+ str(i) for i in range(len(data))]
    )

    df.T.index.names = reference_parameters_dataframe.index.names  # Set index name
    
    # Merge with reference scenario
    scenario_parameter_frame = reference_parameters_dataframe.merge(
        df.T, left_index=True, right_index=True, how='outer'
    )

    scenario_parameter_frame['general', 'parent'] = scenario  # Set parent
    
    # Fill reference values
    for c in scenario_parameter_frame.columns:
        parent = scenario_parameter_frame[c][('general', 'parent')][0]
        scenario_parameter_frame[c] = scenario_parameter_frame[c].fillna(scenario_parameter_frame[parent])

    # Drop duplicates
    scenario_parameter_frame = scenario_parameter_frame.apply(
        pd.to_numeric, axis=1, errors='ignore'
    )
    temp = scenario_parameter_frame.T.drop_duplicates()

    return temp.T

def run_tests(
    all_tests_dict, reference_parameters_dataframe, clean=True, df=None,
):
    for test_name, test_values in all_tests_dict.items():
        print(test_name)
        # Create parameter frame
        if df is None:
            df = create_test_parameters_dataframe(test_values, test_name, reference_parameters_dataframe)

        df.reset_index().to_excel(qpath + r'inputs/parametres.xlsx', index=False)
        # run
        %run 0_python_launcher.ipynb

        if clean:
            # Delete intermediate model steps, except for base
            reference_scenario_name = reference_parameters_dataframe.loc[('general', 'parent')].values[0]
            import os
            models = list(
                (set(os.listdir(qpath + r'models/')).intersection(df.columns)).difference({reference_scenario_name})
            )
            for m in models:
                for step in ['links', 'pt_pathfinder', 'assigned']:
                    try:
                        f = qpath + r'model/{}/{}.zip'.format(m, step)
                        shutil.rmtree(f)
                    except FileNotFoundError:
                        pass


def read_result(scenario, i):
    try:
        temp = pd.read_csv(mpath + r'{}/{}/calibration_results_{}.csv'.format(scenario, tp, scenario), index_col=0)
        try:
            r = temp[i][0]
        except KeyError:
            r = None
    except FileNotFoundError:
        r = None
    return r
                    
                    
def read_tests(all_tests_dict, reference_parameters_dataframe):
    all_results = {}
    temp = pd.read_csv(mpath + r'{}/{}/calibration_results_{}.csv'.format('base', tp, 'base'), index_col=0).T
    for test_name, test_values in all_tests_dict.items():
        df = create_test_parameters_dataframe(test_values, test_name, reference_parameters_dataframe)
        for i in temp.index:
            # Append results
            results = pd.concat(
                [
                    df.apply(
                        lambda x: read_result(x.name, i)
                    )
                ],
                keys=[('results', i)]
            )
            # results = results.apply(lambda x: round(x, 1))
            df = df.append(results.unstack())
        all_results.update({test_name: df})
    return all_results

In [144]:
df = create_test_parameters_dataframe(fare_test, 'fare')
print(len(df.columns))

KeyError: 'ambicioso_sitp_30'

In [141]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,ref_18,ambicioso_sitp_30,monomodal,intermodal,multimodal,fare_0,fare_1,fare_2,fare_3,fare_4,...,fare_21,fare_22,fare_23,fare_24,fare_25,fare_26,fare_27,fare_28,fare_29,fare_30
category,parameter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
add,fx,10,15,100,15,0,5,10,15,5,10,...,15,20,5,10,15,20,5,10,15,20
base,fx,1000,25,35,35,60,20,20,20,25,25,...,45,45,50,50,50,50,55,55,55,55
fare_id,bus,bus,bus,f0,"f0, f1, f2, f3",fn,"f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3",...,"f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3"
fare_id,concho,concho,concho,concho,concho,concho,concho,concho,concho,concho,concho,...,concho,concho,concho,concho,concho,concho,concho,concho,concho,concho
fare_id,express_bus,integrated,integrated,f0,"f0, f1, f2, f3",fn,"f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3",...,"f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3","f0, f1, f2, f3"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
transfers,express_bus,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
transfers,integrated,10,10,10,10,10,10,10,10,10,10,...,10,10,10,10,10,10,10,10,10,10
transfers,minibus,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
transfers,opret,10,10,10,10,10,10,10,10,10,10,...,10,10,10,10,10,10,10,10,10,10


# Read results

In [10]:
all_results = read_tests(all_tests, base_scenario_parameter_frame)

In [11]:
import seaborn as sns
import matplotlib.pyplot as plt
# from syspy.syspy_utils import data_visualization as dv
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (16, 5)
plt.rcParams['font.size']=14

In [12]:
def plot_2d(r, indicator_kwargs={}):
    fig, axes = plt.subplots(1, len(indicator_kwargs))
    axe_i = 0
    for indicator, kwargs in indicator_kwargs.items():
        temp = r[indicator].applymap(float)
        plot_kwargs = kwargs.get('plot_kwargs',{})
        ax=sns.heatmap(temp.applymap(float), ax=axes[axe_i], **plot_kwargs)
        for i in range(temp.shape[0]):
            for j in range(temp.shape[1]):
                text = axes[axe_i].text(
                    j+0.5, i+0.5, round(temp.iloc[i,j],2),
                    ha="center", va="center", color="k"
                )
        axes[axe_i].set_title(kwargs.get('plot_title', ''))
        
        axe_i += 1

    return fig, axes

def plot_1d(r, indicator_kwargs={}):
    fig, axes = plt.subplots(1, len(indicator_kwargs))
    axe_i = 0
    for indicator, kwargs in indicator_kwargs.items():
        temp = r.loc[indicator].sort_index()
        plot_kwargs = kwargs.get('plot_kwargs',{})
        temp.plot(ax=axes[axe_i], **plot_kwargs)
       
        axes[axe_i].set_title(kwargs.get('plot_title', ''))
        axe_i += 1

    
    return fig, axes

In [13]:
# Calibration tests
for test_name in all_tests.keys():
    params = [tuple([v[0], v[1]]) for v in all_tests[test_name]]
    n_params = len(all_tests[test_name])
    variable_name = ' '.join([x for x in test_name.split('_')[:-1] if x!='lrt'])

    result_names = [
        ('results', 'main_lines_geh'), ('results', 'geh_10_share'),
        ('results', 'score_4'), ('results', 'score_5'),
        ('results', 'transfer_rate'), ('results', 'walk_volumes')
    ]

    r = all_results[test_name].loc[
             params + result_names
    ].T.set_index(params).unstack()

    if n_params==1:
        plot_1d(
            r,
            indicator_kwargs={
                ('results', 'main_lines_geh'): {
                    'plot_title': 'main lines geh wrt \n {}'.format(variable_name),
                    'plot_kwargs': {'ylim': [0, 100]}
                },
                 ('results', 'geh_10_share'): {
                    'plot_title': 'geh_10_share wrt \n {}'.format(variable_name),
                    'plot_kwargs': {'ylim': [0, 100]}
                },
                 ('results', 'score_4'): {
                    'plot_title': 'score_4 wrt \n {}'.format(variable_name),
                    'plot_kwargs': {'ylim': [0, 100]}
                },
                ('results', 'score_5'): {
                    'plot_title': 'score_5 wrt \n {}'.format(variable_name),
                    'plot_kwargs': {'ylim': [0, 100]}
                },
                ('results', 'transfer_rate'): {
                    'plot_title': 'transfer_rate wrt \n{}'.format(variable_name),
                    'plot_kwargs': {'ylim': [0.8, 1.5]}
                },
                ('results', 'walk_volumes'): {
                    'plot_title': 'walk_volumes wrt \n{}'.format(variable_name),
                    'plot_kwargs': {'ylim': [0, 1500]}
                }
            }
        )

    if n_params==2:
        plot_2d(
            r,
            indicator_kwargs={
              ('results', 'main_lines_geh'): {
                    'plot_title': 'main_lines_geh with respect to {}'.format(variable_name),
                    'plot_kwargs': {
                        'vmin': 0, 'vmax': 100,
                        'cmap': 'RdYlGn_r', 'square': True,
                    }
                },
                 ('results', 'geh_10_share'): {
                    'plot_title': 'geh_10_share with respect to {}'.format(variable_name),
                    'plot_kwargs': {
                        'vmin': 0, 'vmax': 100,
                        'cmap': 'RdYlGn_r', 'square': True,
                    }
                }
            }
        )
        plt.tight_layout()   
        plot_2d(
            r,
            indicator_kwargs={
                 ('results', 'score_4'): {
                    'plot_title': 'score_4 with respect to {}'.format(variable_name),
                    'plot_kwargs': {
                        'vmin': 0, 'vmax': 100,
                        'cmap': 'RdYlGn', 'square': True,
                    }
                },
                ('results', 'score_5'): {
                    'plot_title': 'score_5 with respect to {}'.format(variable_name),
                    'plot_kwargs': {
                        'vmin': 0, 'vmax': 100,
                        'cmap': 'RdYlGn', 'square': True,
                    }
                }
            }
        )
        plt.tight_layout() 
        plot_2d(
            r,
            indicator_kwargs={
                 ('results', 'transfer_rate'): {
                    'plot_title': 'transfer_rate with respect to {}'.format(variable_name),
                    'plot_kwargs': {
                        'vmin': 1, 'vmax': 2,
                        'cmap': 'RdYlGn_r', 'square': True,
                    }
                },
                ('results', 'walk_volumes'): {
                    'plot_title': 'walk_volumes with respect to {}'.format(variable_name),
                    'plot_kwargs': {
                        'vmin': 500, 'vmax': 2000,
                        'cmap': 'RdYlGn_r', 'square': True,
                    }
                }
            }
        )

    plt.tight_layout()    

<Figure size 1152x360 with 0 Axes>

# results insight

In [14]:
# all_results['all'].to_excel(opath + r'calibration_results_3000.xlsx')

In [15]:
all_results['all'].loc[('results', 'score')].fillna(1000000).describe()

count     2500.000000
mean     10049.400303
std        209.799293
min       9844.724911
25%       9905.757454
50%       9970.445401
75%      10123.342190
max      11076.168264
Name: (results, score), dtype: float64

In [16]:
filtered_results = [k for k,v in (all_results['all'].loc[('results', 'score')].fillna(0).map(int) < 9880).to_dict().items() if v == True]
filtered_results

['base',
 'all_28',
 'all_30',
 'all_31',
 'all_32',
 'all_33',
 'all_35',
 'all_36',
 'all_37',
 'all_38',
 'all_39',
 'all_40',
 'all_41',
 'all_42',
 'all_43',
 'all_44',
 'all_45',
 'all_46',
 'all_47',
 'all_48',
 'all_49',
 'all_55',
 'all_56',
 'all_57',
 'all_58',
 'all_59',
 'all_60',
 'all_61',
 'all_62',
 'all_63',
 'all_64',
 'all_65',
 'all_66',
 'all_67',
 'all_68',
 'all_69',
 'all_73',
 'all_74',
 'all_75',
 'all_80',
 'all_81',
 'all_82',
 'all_83',
 'all_84',
 'all_150',
 'all_151',
 'all_152',
 'all_153',
 'all_155',
 'all_156',
 'all_157',
 'all_158',
 'all_160',
 'all_161',
 'all_162',
 'all_163',
 'all_165',
 'all_166',
 'all_167',
 'all_169',
 'all_170',
 'all_171',
 'all_172',
 'all_173',
 'all_174',
 'all_175',
 'all_180',
 'all_181',
 'all_182',
 'all_270',
 'all_271',
 'all_275',
 'all_276',
 'all_277',
 'all_280',
 'all_281',
 'all_282',
 'all_283',
 'all_285',
 'all_286',
 'all_287',
 'all_288',
 'all_390',
 'all_391',
 'all_392',
 'all_395',
 'all_396',
 '

In [17]:
df = all_results['all'].copy()
filtered_results = [k for k,v in (df.loc[('results', 'score_4')]>77).to_dict().items() if v == True]
filtered_results

['all_58',
 'all_324',
 'all_438',
 'all_683',
 'all_2321',
 'all_2420',
 'all_2446',
 'all_2471',
 'all_2472',
 'all_2473']

In [18]:
[x[:2] for x in all_tests['all']] + [('results', )]
tuple_results = [('results', x) for x in ['geh_10_share', 'main_lines_geh', 'score', 'score_4', 'score_5', 'transfer_rate','walk_volumes']]
results_parameters = [x[:2] for x in all_tests['all']] + tuple_results

In [19]:
all_results['all'][filtered_results].loc[results_parameters]

Unnamed: 0_level_0,Unnamed: 1_level_0,all_58,all_324,all_438,all_683,all_2321,all_2420,all_2446,all_2471,all_2472,all_2473
category,parameter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
clustering,mean_distance_threshold,50.0,50.0,50.0,100.0,200.0,200.0,200.0,200.0,200.0,200.0
general,boarding_time,150.0,210.0,240.0,150.0,240.0,270.0,270.0,270.0,270.0,270.0
footpaths,footpath_speed,1.75,1.75,1.75,1.75,1.75,1.5,1.75,2.0,2.0,2.0
strategy,alpha,0.7,1.0,0.8,0.7,1.0,1.0,1.0,1.0,1.0,1.0
penalties,tramway_duration,0.8,0.85,0.8,0.8,0.7,0.6,0.7,0.7,0.75,0.8
results,geh_10_share,9.12,6.58,6.58,9.12,6.58,6.58,6.58,6.58,6.58,6.58
results,main_lines_geh,10.9,6.66,8.24,10.42,4.92,5.31,4.24,4.09,4.1,4.1
results,score,9860.123139,9967.454594,9967.590492,9860.741517,9878.464821,9868.47385,9920.98824,10008.501325,10010.630472,10013.741036
results,score_4,77.22,78.62,80.41,77.22,78.62,80.41,78.62,78.62,78.62,78.62
results,score_5,77.22,82.03,80.41,77.22,89.53,82.67,87.74,87.74,87.74,87.74


In [20]:
all_results['all'].to_excel(mpath + 'calibration_all_20220313.xlsx')

In [None]:
selected_scenarios = ['all_24', 'all_268', 'all_1796']

In [None]:
all_results['all'][selected_scenarios].to_excel(ipath + r'parameters_selected.xlsx')

KeyError: "None of [Index(['all_24', 'all_268', 'all_1796'], dtype='object')] are in the [columns]"