## Imports

In [1]:
from CU_POLARIS_Postprocessor.config import PostProcessingConfig
from CU_POLARIS_Postprocessor import prerun, parallel
from CU_POLARIS_Postprocessor.power_bi_processing import prep_utils
from pathlib import Path




## Config

In [23]:
config_wtp = PostProcessingConfig(
    fresh_start = False,
    reset_sql = False,
    reset_csvs= False,
    reset_stops = False,
    force_skims = False,
    base_dir = Path('CU_POLARIS_Postprocessor/demo_files/'),
    scenario_file_names = ['scenario_abm.modified.json','scenario_abm_trajectories.modified.json'],
    fleet_model_file_names = ['SAEVFleetModel_optimization.json'],
    db_names = ['campo', 'greenville'],
    pooling_model_file = ['PoolingModel.json'],
    postprocessing_definitions = {
            'requests_sum':("process_solo_equiv_fare", {'force_skims': False}),
            "closest_stops":("process_nearest_stops", {}),
            "tnc_skim_demo":("process_elder_request_agg", {}),
            "tnc_stat_summary":("process_tnc_stat_summary",{})
        },
    desired_outputs = {
         'transit_trip_max_load_helper':'sql',
            'attach':'sql_helper',         
            'transit_trip_max_load':'sql',
            'mode_Distribution_ADULT_Counts':'sql',
            'mode_Distribution_ADULT_Distance':'sql',
            'bus_avo':'sql',
            'pr_avo':'sql',
            'fare_sensitivity_results':'sql',
            'mode_Distribution_ADULT':'sql',
            'distance_tnc_dist':'sql',
            'fare_sensitivity_results_zonal':'sql',
            'fare_sensitivity_demograpic_tnc_stats':'sql',
            'fare_sensitivity_results_vo':'sql',
            'tnc_results_discount':'sql',
            'elder_demo':'sql',
            'requests': 'postprocessing_helper',
            'requests_sum_helper':'postprocessing_helper',
            'requests_sum': 'postprocessing',
            'closest_stops':'postprocessing_helper',
            'tnc_stat_summary_helper':'postprocessing_helper',
            'tnc_stat_summary':'postprocessing',
            'tnc_skim_demo':'postprocessing'
    },
    output_h5=True
)

## Preprocessing

In [24]:
prerun.pre_run_checks(config_wtp)


True

### Show output of prerun

In [25]:
import pandas as pd
csvs = config_wtp.csvs
demo = []
for key, value in csvs.items():
        # Extract the values for each column
        type_val = value.get('type', None)
        exists_val = value.get('exists', None)
        location_val = value.get('path', None)
        
        # Append the row as a tuple
        demo.append((key, type_val, exists_val, location_val))

    # Create a DataFrame from the rows
df = pd.DataFrame(demo, columns=['Key', 'Type', 'Exists', 'Location'])
df

Unnamed: 0,Key,Type,Exists,Location
0,transit_trip_max_load_helper,sql,False,C:\Users\jpaul4\Box\Research\Papers\4_WTP Fact...
1,transit_trip_max_load,sql,False,C:\Users\jpaul4\Box\Research\Papers\4_WTP Fact...
2,mode_Distribution_ADULT_Counts,sql,False,C:\Users\jpaul4\Box\Research\Papers\4_WTP Fact...
3,mode_Distribution_ADULT_Distance,sql,False,C:\Users\jpaul4\Box\Research\Papers\4_WTP Fact...
4,bus_avo,sql,False,C:\Users\jpaul4\Box\Research\Papers\4_WTP Fact...
5,pr_avo,sql,False,C:\Users\jpaul4\Box\Research\Papers\4_WTP Fact...
6,fare_sensitivity_results,sql,False,C:\Users\jpaul4\Box\Research\Papers\4_WTP Fact...
7,mode_Distribution_ADULT,sql,False,C:\Users\jpaul4\Box\Research\Papers\4_WTP Fact...
8,distance_tnc_dist,sql,False,C:\Users\jpaul4\Box\Research\Papers\4_WTP Fact...
9,fare_sensitivity_results_zonal,sql,False,C:\Users\jpaul4\Box\Research\Papers\4_WTP Fact...


## Process

This takes about 10 minutes for these output folders from scratch, but it builds on existing incomplete processing or pulls results as needed.

In [26]:
parallel.parallel_process_folders(config_wtp)


FileNotFoundError: [WinError 3] The system cannot find the path specified: 'C:\\Users\\jpaul4\\Box\\Research\\Papers\\4_WTP Factors Paper\\cases_only_critical\\gvl_heur\\greenville_iteration_7\\CU_POLARIS_Postprocessor\\demo_files'

### Show Output of processing

In [6]:
for key, value in config_wtp.results.items():
    print(key)

transit_trip_max_load
mode_Distribution_ADULT_Counts
mode_Distribution_ADULT_Distance
bus_avo
pr_avo
fare_sensitivity_results
mode_Distribution_ADULT
distance_tnc_dist
fare_sensitivity_results_zonal
fare_sensitivity_demograpic_tnc_stats
fare_sensitivity_results_vo
tnc_results_discount
elder_demo
requests_sum
tnc_stat_summary
tnc_skim_demo


### Power BI Post Processing

In [19]:
base_cases = ['atx_du_7','gvl_du_7']
prep_utils.process_tnc_ttests(config_wtp,base_cases)
prep_utils.process_folder_names(config_wtp)
prep_utils.update_h5(config_wtp)

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block2_values] [items->Index(['mode', 'type', 'folder', 'City', 'Iteration', 'Strategy', 'WTP_Type'], dtype='object')]

  store[key] = df


True

### Load in final results

In [1]:
import pandas as pd
h5_path = config_wtp.base_dir.as_posix+'/results.h5'

mode_Distribution_ADULT_Distance=  pd.read_hdf(h5_path,key='mode_Distribution_ADULT_Distance')
mode_Distribution_ADULT_Counts=  pd.read_hdf(h5_path,key='mode_Distribution_ADULT_Counts')
bus_avo=  pd.read_hdf(h5_path,key='bus_avo')
pr_avo=  pd.read_hdf(h5_path,key='pr_avo')
fare_sensitivity_results=  pd.read_hdf(h5_path,key='fare_sensitivity_results')
mode_Distribution_ADULT=  pd.read_hdf(h5_path,key='mode_Distribution_ADULT')
distance_tnc_dist=  pd.read_hdf(h5_path,key='distance_tnc_dist')
fare_sensitivity_results_zonal=  pd.read_hdf(h5_path,key='fare_sensitivity_results_zonal')
fare_sensitivity_demograpic_tnc_stats=  pd.read_hdf(h5_path,key='fare_sensitivity_demograpic_tnc_stats')
fare_sensitivity_results_vo=  pd.read_hdf(h5_path,key='fare_sensitivity_results_vo')
tnc_results_discount=  pd.read_hdf(h5_path,key='tnc_results_discount')
elder_demo=  pd.read_hdf(h5_path,key='elder_demo')
requests_sum=  pd.read_hdf(h5_path,key='requests_sum')
tnc_stat_summary=  pd.read_hdf(h5_path,key='tnc_stat_summary')
tnc_skim_demo=  pd.read_hdf(h5_path,key='tnc_skim_demo')
tnc_ttests=  pd.read_hdf(h5_path,key='tnc_ttests')
transit_trip_max_load_helper=  pd.read_hdf(h5_path,key='transit_trip_max_load_helper')

NameError: name 'pd' is not defined