In [3]:
import os
import pandas as pd

# Base vs Scenario

In [48]:
def produce_single_output(run_dir, run_name, output_path):
    # Load input and lookup data
    trip = pd.read_csv(os.path.join(run_dir, r'outputs\_trip.tsv'), sep='\t')
#     trip = trip.iloc[0:100000]

    hh = pd.read_csv(os.path.join(run_dir, r'outputs\_household.tsv'), sep='\t')

    # Merge hhtaz to the trip records
    trip = pd.merge(trip, hh[['hhno','hhparcel']], on='hhno', how='left')
                
    # Load parcel-census lookup
    parcel_census = pd.read_csv(r'R:\Brice\gis\parcels_urbansim_census.txt')
    buffered_parcels = pd.read_csv(os.path.join(run_dir,r'inputs\buffered_parcels.txt'), sep=' ')
    buffered_parcels = pd.merge(buffered_parcels,parcel_census[['TRACTCE10','parcelid']], on='parcelid', how='left')
                     
    # Commute trip cost
    # Merge census tract to trip records
    trip = pd.merge(trip, parcel_census[['TRACTCE10','parcelid']], left_on='hhparcel', 
                    right_on='parcelid', how='left')
                     
    # Trip cost, auto commute, direct from home to work
    commute_trips = trip[(trip['dpurp'] == 1) & (trip['opurp'] == 0) & (trip['mode'].isin([3,4,5]))]

    # Average commute costs by car (SOV, HOV2, HOV3+)
    commute_trips = commute_trips.groupby(['TRACTCE10']).mean()['travcost']
                  
    # Transit access
    # NOT including local buses... should we be?? (dist_lbus)

    # Indicate if closest transit stop is within a half mile
    buffered_parcels.ix[buffered_parcels[['dist_ebus','dist_crt','dist_fry','dist_lrt','dist_lbus']].min(axis=1) <= 0.5, 'half_mile_transit'] =  1
    buffered_parcels['half_mile_transit'] = buffered_parcels['half_mile_transit'].fillna(0)
    transit_access = buffered_parcels.groupby('TRACTCE10').mean()['half_mile_transit']
                     
    # Transit Cost
    # average cost for all (?) transit trips
    transit_trips = trip[trip['mode'] == 6]
    transit_cost = transit_trips.groupby(['TRACTCE10']).mean()['travcost']
                     
    # Non-motorized mode share
    # % of all commutes by walk or bike
    
    trip.ix[trip['mode'].isin([1,2]),'nmt'] = 1
    trip['nmt'] = trip['nmt'].fillna(0)
    nmt_share = trip.groupby(['TRACTCE10']).mean()['nmt']
                     
    # Write results to CSV
    df = pd.DataFrame([nmt_share.index, commute_trips, transit_cost,transit_access,nmt_share]).T
    df.columns = ['tract','driving_commute_cost','transit_cost','transit_access','nmt_share']
    df['source'] = run_name
    
    # Write tract with leading zeros to match census shapefile
    df['tract'] = df['tract'].fillna(0).astype('int').astype('str').apply(lambda x: x.zfill(6))
    
    return df

In [None]:
# Results for census tracts
output_path = r'J:\Projects\Soundcast\opportunity_index\opportunity_index.txt'

run_dir = r'U:\Stefan\soundcast_2014'
run_name = '2014'
df_base = produce_single_output(run_dir, run_name, output_path)

run_dir = r'S:\Stefan\soundcast_2040_plan'
run_name = '2040 Plan'
df_scen = produce_single_output(run_dir, run_name, output_path)

In [80]:
def produce_single_output_taz(run_dir, run_name, output_path):
    # Load input and lookup data
    trip = pd.read_csv(os.path.join(run_dir, r'outputs\_trip.tsv'), sep='\t')
#     trip = trip.iloc[0:100000]

    hh = pd.read_csv(os.path.join(run_dir, r'outputs\_household.tsv'), sep='\t')

    # Merge hhtaz to the trip records
    trip = pd.merge(trip, hh[['hhno','hhparcel','hhtaz']], on='hhno', how='left')
                
    # Load parcel-census lookup
    buffered_parcels = pd.read_csv(os.path.join(run_dir,r'inputs\buffered_parcels.txt'), sep=' ')
                     
    # Commute trip cost
    # Merge census tract to trip records
                     
    # Trip cost, auto commute, direct from home to work
    commute_trips = trip[(trip['dpurp'] == 1) & (trip['opurp'] == 0) & (trip['mode'].isin([3,4,5]))]

    # Average commute costs by car (SOV, HOV2, HOV3+)
    commute_trips = commute_trips.groupby(['hhtaz']).mean()['travcost']
                  
    # Transit access
    # NOT including local buses... should we be?? (dist_lbus)

    # Indicate if closest transit stop is within a half mile
    buffered_parcels.ix[buffered_parcels[['dist_ebus','dist_crt','dist_fry','dist_lrt','dist_lbus']].min(axis=1) <= 0.5, 'half_mile_transit'] =  1
    buffered_parcels['half_mile_transit'] = buffered_parcels['half_mile_transit'].fillna(0)
    transit_access = buffered_parcels.groupby('taz_p').mean()['half_mile_transit']
                     
    # Transit Cost
    # average cost for all (?) transit trips
    transit_trips = trip[trip['mode'] == 6]
    transit_cost = transit_trips.groupby(['hhtaz']).mean()['travcost']
                     
    # Non-motorized mode share
    # % of all commutes by walk or bike
    
    trip.ix[trip['mode'].isin([1,2]),'nmt'] = 1
    trip['nmt'] = trip['nmt'].fillna(0)
    nmt_share = trip.groupby(['hhtaz']).mean()['nmt']
                     
    # Write results to CSV
    df = pd.DataFrame([nmt_share.index, commute_trips, transit_cost,transit_access,nmt_share]).T
    df.columns = ['taz','driving_commute_cost','transit_cost','transit_access','nmt_share']
    df['source'] = run_name
        
    return df

In [81]:
# Results using TAZ
output_path = r'J:\Projects\Soundcast\opportunity_index\opportunity_index_taz.txt'

run_dir = r'U:\Stefan\soundcast_2014'
run_name = '2014'
df_base = produce_single_output_taz(run_dir, run_name, output_path)

run_dir = r'S:\Stefan\soundcast_2040_plan'
run_name = '2040 Plan'
df_scen = produce_single_output_taz(run_dir, run_name, output_path)

### Compare scenario versus a baseline, using baseline bounds


In [82]:
# Calculate z scores
# calculate z zcores
df_base['transit_cost_z_score'] = (df_base['transit_cost'] - df_base['transit_cost'].mean())/df_base['transit_cost'].std()
df_base['driving_commute_cost_z_score'] = (df_base['driving_commute_cost'] - df_base['driving_commute_cost'].mean())/df_base['driving_commute_cost'].std()
df_base['transit_access_z_score'] = (df_base['transit_access'] - df_base['transit_access'].mean())/df_base['transit_access'].std()
df_base['nmt_share_z_score'] = (df_base['nmt_share'] - df_base['nmt_share'].mean())/df_base['nmt_share'].std()

# Some factors are negative, convert (transit cost, commute cost are good as negative values,
# but transit access and nmt share are good as positive)
df_base['transit_cost_z_score'] = df_base['transit_cost_z_score']*-1
df_base['driving_commute_cost_z_score'] = df_base['driving_commute_cost_z_score']*-1

# calculate opportunity index as unweighted average of all z scores
df_base['opportunity_index'] = df_base[['transit_cost_z_score','driving_commute_cost_z_score',
                             'transit_access_z_score','nmt_share_z_score']].mean(axis=1)

# 
df_scen['transit_cost_z_score'] = (df_scen['transit_cost'] - df_base['transit_cost'].mean())/df_base['transit_cost'].std()
df_scen['driving_commute_cost_z_score'] = (df_scen['driving_commute_cost'] - df_base['driving_commute_cost'].mean())/df_base['driving_commute_cost'].std()
df_scen['transit_access_z_score'] = (df_scen['transit_access'] - df_base['transit_access'].mean())/df_base['transit_access'].std()
df_scen['nmt_share_z_score'] = (df_scen['nmt_share'] - df_base['nmt_share'].mean())/df_base['nmt_share'].std()

# Some factors are negative, convert (transit cost, commute cost are good as negative values,
# but transit access and nmt share are good as positive)
df_scen['transit_cost_z_score'] = df_scen['transit_cost_z_score']*-1
df_scen['driving_commute_cost_z_score'] = df_scen['driving_commute_cost_z_score']*-1

# calculate opportunity index as unweighted average of all z scores
df_scen['opportunity_index'] = df_scen[['transit_cost_z_score','driving_commute_cost_z_score',
                             'transit_access_z_score','nmt_share_z_score']].mean(axis=1)

In [91]:
# Cut the base data in quintiles
df_base['opportunity_index_group'] = pd.qcut(df_base['opportunity_index'],5,labels=['lowest','low','moderate','high','highest'])
bins = pd.qcut(df_base['opportunity_index'],5,retbins=True)[1]

# Use these bins to slice the scenario results
df_scen['opportunity_index_group'] = pd.cut(x=df_scen['opportunity_index'],bins=bins,labels=['lowest','low','moderate','high','highest'])
# df_scen['opportunity_index_group'] = pd.cut(df_scen['opportunity_index'],5,labels=['lowest','low','moderate','high','highest'])

In [73]:
# Write to file
df = df_scen.append(df_base)
df.to_csv(output_path, index=False)

In [98]:
# Show a difference map
compare_dir = r'J:\Projects\Soundcast\opportunity_index\opportunity_index_taz_compare.txt'
df = pd.merge(df_base,df_scen,on='taz',suffixes=['_2014','_2040'])
df['taz'] = df['taz'].fillna(0).astype('int').astype('str')
df.to_csv(compare_dir)

0         1
1         2
2         3
3         4
4         5
5         6
6         7
7         8
8         9
9        10
10       11
11       12
12       13
13       14
14       15
15       16
16       17
17       18
18       19
19       20
20       21
21       22
22       23
23       24
24       25
25       26
26       27
27       28
28       29
29       30
         ..
62815     0
62816     0
62817     0
62818     0
62819     0
62820     0
62821     0
62822     0
62823     0
62824     0
62825     0
62826     0
62827     0
62828     0
62829     0
62830     0
62831     0
62832     0
62833     0
62834     0
62835     0
62836     0
62837     0
62838     0
62839     0
62840     0
62841     0
62842     0
62843     0
62844     0
Name: taz, dtype: object

In [33]:
df_base['transit_access_z_score'].mean()

1.8298546328148307e-15

In [34]:
df_scen['transit_access_z_score'].mean()

0.06683609549617174

In [35]:
df_base.transit_access.mean()

0.6276665577660727

In [36]:
df_scen.transit_access.mean()

0.6524064927220737

In [42]:
df_base.driving_commute_cost.mean()

2.5901478070332957

In [43]:
df_scen.driving_commute_cost.mean()

2.5560056886498046

In [44]:
df_base.transit_cost.mean()

1.3962454711585979

In [45]:
df_scen.transit_cost.mean()

1.4733573590086115