In [60]:
import os, sys, time
import warnings
import re
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
import pandana as pdna
pd.options.mode.chained_assignment = None
import h5py
sys.path.append(os.path.join(os.getcwd(),r'..\..\..'))
from input_configuration import base_year

%matplotlib inline

from IPython.display import display, HTML

if (sys.version_info < (3, 0)):
    HTML('''<script>
    code_show=true; 
    function code_toggle() {
     if (code_show){
     $('div.input').hide();
     } else {
     $('div.input').show();
     }
     code_show = !code_show
    } 
    $( document ).ready(code_toggle);
    </script>
    <form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')


In [2]:
CSS = """
.output {
    flex-direction: row;
}
"""

HTML('<style>{}</style>'.format(CSS))

# Relative path between notebooks and goruped output directories
relative_path = '../../../outputs'
survey_path = '../../../inputs/base_year/survey'

In [3]:
pd.options.display.float_format = '{:0,.0f}'.format

In [4]:
def load_network_summary(filepath):
    """Load network-level results using a standard procedure. """
    df = pd.read_csv(filepath)

    # Congested network components by time of day
    df.columns

    # Get freeflow from 20to5 period

    # Exclude trips taken on non-designated facilities (facility_type == 0)
    # These are artificial (weave lanes to connect HOV) or for non-auto uses 
    df = df[df['data3'] != 0]    # data3 represents facility_type

    # calculate total link VMT and VHT
    df['VMT'] = df['@tveh']*df['length']
    df['VHT'] = df['@tveh']*df['auto_time']/60

    # Define facility type
    df.loc[df['data3'].isin([1,2]), 'facility_type'] = 'highway'
    df.loc[df['data3'].isin([3,4,6]), 'facility_type'] = 'arterial'
    df.loc[df['data3'].isin([5]), 'facility_type'] = 'connector'

    # Calculate delay
    # Select links from overnight time of day
    delay_df = df.loc[df['tod'] == '20to5'][['ij','auto_time']]
    delay_df.rename(columns={'auto_time':'freeflow_time'}, inplace=True)

    # Merge delay field back onto network link df
    df = pd.merge(df, delay_df, on='ij', how='left')

    # Calcualte hourly delay
    df['total_delay'] = ((df['auto_time']-df['freeflow_time'])*df['@tveh'])/60    # sum of (volume)*(travtime diff from freeflow)

    df['county'] =df['@countyid'].map({33: 'King',
                                      35: 'Kitsap',
                                      53: 'Pierce',
                                      61: 'Snohomish'})
    
    return df

# Trips per Day by Resident
County, Center, Regional Geography

Youth, elderly, LEP, Race & Poverty for Region Work/Non-Work

In [5]:
pd.options.display.float_format = '{:0,.1f}'.format
df_trip = pd.read_csv(r'..\..\..\outputs\agg\dash\person_trips.csv')
df_person = pd.read_csv(r'..\..\..\outputs\agg\dash\person_geog.csv')

### County and Region

In [6]:
def trips_per_day(geog): 
    df1 = df_person.groupby(geog).sum()[['psexpfac']]
    df2 = df_trip.groupby(geog).sum()[['trexpfac']]
    df = df1.merge(df2, left_index=True, right_index=True)
    df.loc['Region', 'psexpfac'] = df_person.sum()['psexpfac']
    df.loc['Region', 'trexpfac'] = df_trip.sum()['trexpfac']
    df['Total Trips per Day'] = df['trexpfac']/df['psexpfac']

    df_trip.loc[df_trip['dpurp'] != 'Work', 'trip_type'] = 'Non-Work'
    df_trip.loc[df_trip['dpurp'] == 'Work', 'trip_type'] = 'Work'
    for purp in ['Work','Non-Work']:
        _df = df_trip[df_trip['trip_type'] == purp].groupby(geog).sum()[['trexpfac']]
        _df.rename(columns={'trexpfac': purp + ' Trips'}, inplace=True)

        # Add regional total to bottom of table
        _df.loc['Region', purp + ' Trips'] = df_trip[df_trip['trip_type'] == purp].sum()['trexpfac']
        df = df.merge(_df, left_index=True, right_index=True)

    df['Work Trips per Day'] = df['Work Trips']/df['psexpfac']
    df['Non-Work Trips per Day'] = df['Non-Work Trips']/df['psexpfac']
    df[['Work Trips per Day','Non-Work Trips per Day','Total Trips per Day']]
    
    return df[['Work Trips per Day', 'Non-Work Trips per Day','Total Trips per Day']]

In [7]:
df = trips_per_day('hh_county')
df.to_csv('rtp_person_1_trips_per_day_county.csv')
df

Unnamed: 0_level_0,Work Trips per Day,Non-Work Trips per Day,Total Trips per Day
hh_county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
King,0.7,3.5,4.2
Kitsap,0.5,3.4,3.9
Pierce,0.5,3.3,3.8
Snohomish,0.6,3.4,3.9
Region,0.6,3.4,4.0


### Regional Growth Centers

In [8]:
df = trips_per_day('hh_rgc')
df.to_csv('rtp_person_2_trips_per_day_rgc.csv')
df

Unnamed: 0_level_0,Work Trips per Day,Non-Work Trips per Day,Total Trips per Day
hh_rgc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Auburn,0.6,3.5,4.0
Ballard-Interbay,1.0,3.6,4.6
Bellevue,0.9,3.6,4.6
Bothell Canyon Park,0.8,3.3,4.1
Bremerton,0.6,3.5,4.1
Burien,0.7,3.4,4.1
Cascade,0.5,3.3,3.8
Duwamish,0.8,3.5,4.3
Everett,0.6,3.5,4.2
Frederickson,0.4,3.2,3.7


### Regional Geographies

In [9]:
df = trips_per_day('hh_rg_proposed')
df.to_csv('rtp_person_3_trips_per_day_reg_geog.csv')
df

Unnamed: 0_level_0,Work Trips per Day,Non-Work Trips per Day,Total Trips per Day
hh_rg_proposed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CitiesTowns,0.5,3.4,3.9
Core,0.6,3.4,4.0
HCT,0.6,3.4,4.0
Metro,0.7,3.6,4.3
Rural,0.5,3.2,3.7
UU,0.5,3.3,3.8
Region,0.6,3.4,4.0


Equity Geographies

In [10]:
# Summary by equity geographies
# Calculate person totals
results_df = pd.DataFrame()
equity_geogs = ['hh_youth','hh_elderly','hh_english','hh_racial','hh_poverty']
for equity_geog in equity_geogs:
    for geog_type in ['_reg','_50']:
        val = df_person[df_person[equity_geog+geog_type] == 1].sum()['psexpfac']
        results_df.loc[equity_geog+geog_type,'psexpfac'] = val
        
# Calculate total work trips per each group
work_results_df = pd.DataFrame()
equity_geogs = ['hh_youth','hh_elderly','hh_english','hh_racial','hh_poverty']
for equity_geog in equity_geogs:
    for geog_type in ['_reg','_50']:
        for purp in ['Work','Non-Work']:
            _df = df_trip[df_trip['trip_type'] == purp].groupby('hh_county').sum()[['trexpfac']]

In [11]:
results_df

Unnamed: 0,psexpfac
hh_youth_reg,2198562.0
hh_youth_50,0.0
hh_elderly_reg,1875028.0
hh_elderly_50,4531.0
hh_english_reg,1555161.0
hh_english_50,0.0
hh_racial_reg,1822876.0
hh_racial_50,851908.0
hh_poverty_reg,1684701.0
hh_poverty_50,94821.0


FIXME: add dpurp in addition to mode; remove some of the unused fields like RGC, city

In [12]:
df_trip[df_trip['hh_racial_reg'] == 1].groupby('mode').sum()[['trexpfac']]

Unnamed: 0_level_0,trexpfac
mode,Unnamed: 1_level_1
Bike,112491.0
HOV2,1686086.0
HOV3+,1196733.0
SOV,2960185.0
School Bus,154755.0
TNC,80483.0
Transit,253321.0
Walk,998331.0


In [13]:
join_cols

NameError: name 'join_cols' is not defined

In [14]:
df.groupby('hh_racial_50').sum()

KeyError: 'hh_racial_50'

In [15]:
df['psexpfac'].sum()

KeyError: 'psexpfac'

# Miles Driven per Day by Resident
County, Center, Regional Geography

Youth, elderly, LEP, Race & Poverty for Region Work/Non-Work

In [16]:
df_vmt = pd.read_csv(r'..\..\..\outputs\agg\dash\person_vmt.csv')
df_person = pd.read_csv(r'..\..\..\outputs\agg\dash\person_geog.csv')

# Select only drivers (dorp = 1) and auto trips
df_vmt = df_vmt[df_vmt['mode'].isin(['SOV','HOV2','HOV3+']) & (df_vmt['dorp'] == 1)]

def vmt_per_person(df_vmt, df_person, geog):
    _df_vmt = df_vmt.groupby(geog).sum()[['travdist_wt']]
    _df_person = df_person.groupby(geog).sum()[['psexpfac']]

    df = _df_vmt.merge(_df_person, left_index=True, right_index=True)
    df.loc['Region',:] = df.sum(axis=0)
    df['Average Miles per Person'] = df['travdist_wt']/df['psexpfac']
    
    return df[['Average Miles per Person']]

### County

In [17]:
df = vmt_per_person(df_vmt, df_person, 'hh_county')
df.to_csv('rtp_person_5_vmt_per_day_county.csv')
df

Unnamed: 0_level_0,Average Miles per Person
hh_county,Unnamed: 1_level_1
King,15.8
Kitsap,15.1
Pierce,17.7
Snohomish,19.7
Region,16.9


### Regional Growth Centers

In [18]:
df = vmt_per_person(df_vmt, df_person, 'hh_rgc')
df.to_csv('rtp_person_6_vmt_per_day_rgc.csv')
df

Unnamed: 0_level_0,Average Miles per Person
hh_rgc,Unnamed: 1_level_1
Auburn,12.5
Ballard-Interbay,11.1
Bellevue,7.2
Bothell Canyon Park,21.0
Bremerton,5.4
Burien,13.9
Cascade,17.4
Duwamish,10.9
Everett,9.2
Frederickson,19.5


### Regional Geography

In [19]:
df = vmt_per_person(df_vmt, df_person, 'hh_rg_proposed')
df.to_csv('rtp_person_5_vmt_per_day_reg_geog.csv')
df

Unnamed: 0_level_0,Average Miles per Person
hh_rg_proposed,Unnamed: 1_level_1
CitiesTowns,21.6
Core,16.2
HCT,17.3
Metro,11.6
Rural,25.9
UU,19.3
Region,16.9


### Equity Geographies

# Miles Walking and Biking per Day by Resident
County, Center, Regional Geography

Youth, elderly, LEP, Race & Poverty for Region Work/Non-Work

In [20]:
df_vmt = pd.read_csv(r'..\..\..\outputs\agg\dash\person_vmt.csv')
df_person = pd.read_csv(r'..\..\..\outputs\agg\dash\person_geog.csv')



def walk_bike_per_person(df_vmt, df_person, geog):
    
    results_df = pd.DataFrame()
    for mode in ['Walk','Bike']:
        
        _df_vmt = df_vmt[df_vmt['mode'] == mode]
        _df_vmt = _df_vmt.groupby(geog).sum()[['travdist_wt']]
        _df_person = df_person.groupby(geog).sum()[['psexpfac']]

        df = _df_vmt.merge(_df_person, left_index=True, right_index=True)
        df.loc['Region',:] = df.sum(axis=0)
        df['Average Miles per Person'] = df['travdist_wt']/df['psexpfac']
        results_df[mode] = df['Average Miles per Person']
    
    _df_vmt = df_vmt[df_vmt['mode'].isin(['Walk','Bike'])]
    _df_vmt = _df_vmt.groupby(geog).sum()[['travdist_wt']]
    _df_person = df_person.groupby(geog).sum()[['psexpfac']]

    df = _df_vmt.merge(_df_person, left_index=True, right_index=True)
    df.loc['Region',:] = df.sum(axis=0)
    df['Average Miles per Person'] = df['travdist_wt']/df['psexpfac']
    results_df['Walk and Bike'] = df['Average Miles per Person']
    
    return results_df

### County

In [21]:
df = walk_bike_per_person(df_vmt, df_person, 'hh_county')
df.to_csv('rtp_person_7_walkbike_per_day_county.csv')
df

Unnamed: 0_level_0,Walk,Bike,Walk and Bike
hh_county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
King,0.5,0.3,0.8
Kitsap,0.4,0.2,0.7
Pierce,0.4,0.3,0.7
Snohomish,0.4,0.3,0.6
Region,0.5,0.3,0.8


### Regional Growth Center 

In [22]:
df = walk_bike_per_person(df_vmt, df_person, 'hh_rgc')
df.to_csv('rtp_person_7_walkbike_per_day_rgc.csv')
df

Unnamed: 0_level_0,Walk,Bike,Walk and Bike
hh_rgc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Auburn,0.6,0.4,0.9
Ballard-Interbay,0.8,0.5,1.3
Bellevue,1.0,0.3,1.2
Bothell Canyon Park,0.5,0.6,1.1
Bremerton,0.8,0.3,1.1
Burien,0.5,0.5,1.0
Cascade,0.4,0.2,0.7
Duwamish,0.8,0.3,1.1
Everett,0.8,0.3,1.1
Frederickson,0.3,0.2,0.5


### Regional Geography

In [23]:
df = walk_bike_per_person(df_vmt, df_person, 'hh_rg_proposed')
df.to_csv('rtp_person_7_walkbike_per_day_reg_geog.csv')
df

Unnamed: 0_level_0,Walk,Bike,Walk and Bike
hh_rg_proposed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CitiesTowns,0.4,0.3,0.6
Core,0.4,0.3,0.7
HCT,0.4,0.3,0.7
Metro,0.7,0.3,1.0
Rural,0.3,0.3,0.6
UU,0.4,0.2,0.6
Region,0.5,0.3,0.8


### Equity Geographies

# Mode Share by Resident
County, Center, Regional Geography

Youth, elderly, LEP, Race & Poverty for Region Work/Non-Work

# Jobs Accessible within 45 Minutes of Transit

In [94]:
#### NOTE FIX me
### Move all the parcel loads up here

In [96]:
def get_average_jobs_transit(transit_data, geo_attr, parcel_attributes_list):
    """ Calculate the weighted average number of jobs available across a geography. """

    for attr in parcel_attributes_list: 
        # print 'process attribute: ', attr
        
        # Calculated weight values
        weighted_attr = 'HHweighted_' + attr
        transit_data[weighted_attr] = transit_data['HH_P']*transit_data[attr]
    
    # Group results by geographic defintion
    transit_data_groupby = transit_data.groupby([geo_attr]).sum()
    transit_data_groupby.reset_index(inplace = True)
    for attr in parcel_attributes_list: 
        weighted_attr = 'HHweighted_' + attr
        averaged_attr = 'HHaveraged_' + attr
        transit_data_groupby[averaged_attr] = transit_data_groupby[weighted_attr]/transit_data_groupby['HH_P']
    return transit_data_groupby

def get_transit_information(bank):
    """Extract transit travel times from skim matrices, between all zones"""

    # Bus and rail travel times are the sum of access, wait time, and in-vehicle times; Bus and rail have separate paths
    bus_time = bank.matrix('auxwa').get_numpy_data() + bank.matrix('twtwa').get_numpy_data() + bank.matrix('ivtwa').get_numpy_data() 
    rail_time = bank.matrix('auxwr').get_numpy_data() + bank.matrix('twtwr').get_numpy_data() + bank.matrix('ivtwr').get_numpy_data() 
    
    # Take the shortest transit time between bus or rail
    transit_time = np.minimum(bus_time, rail_time)
    transit_time = transit_time[0:3700, 0:3700]
    transit_time_df = pd.DataFrame(transit_time)
    transit_time_df['from'] = transit_time_df.index
    transit_time_df = pd.melt(transit_time_df, id_vars= 'from', value_vars=list(transit_time_df.columns[0:3700]), var_name = 'to', value_name='travel_time')

    # Join with parcel data; add 1 to get zone ID because emme matrices are indexed starting with 0
    transit_time_df['to'] = transit_time_df['to'] + 1 
    transit_time_df['from'] = transit_time_df['from'] + 1

    return transit_time_df

def process_transit_attribute(transit_time_data, time_max,  attr_list, origin_df, dest_df, tract_dict, county_dict, taz_dict):
    # get transit information
    transit = transit_time_data[transit_time_data.travel_time <= time_max]
    # delete transit opportunities for internal zone travel, we assume all people won't take transit if it is internal zone
    transit = transit[transit['from'] != transit['to']]
    #prepare orgin and destination information
    dest_transit = transit.merge(dest_df, left_on = 'to', right_on = 'TAZ_P', how = 'left')
    dest_transit = pd.DataFrame(dest_transit.groupby(dest_transit['from'])['EMPTOT_P'].sum())
    dest_transit.reset_index(inplace=True)
    origin_dest = origin_df.merge(dest_transit, left_on = 'taz_id', right_on = 'from', how = 'left') 
    # groupby destination information by origin geo id 
    origin_dest_emp = pd.DataFrame(origin_dest.groupby('parcel_id')[attr_list].sum())
    origin_dest_emp.reset_index(inplace=True)
    # get the origin geo level household info
    transit_hh = pd.DataFrame(origin_df.groupby('parcel_id')['HH_P'].sum())
    transit_hh.reset_index(inplace=True)
    # print '2', 'total household: ', transit_hh['HH_P'].sum()
    transit_hh_emp = transit_hh.merge(origin_dest_emp, on = 'parcel_id', how='left')
    transit_hh_emp['census_tract'] = transit_hh_emp['parcel_id'].map(tract_dict)
    transit_hh_emp['county_id'] = transit_hh_emp['parcel_id'].map(county_dict)
    transit_hh_emp['region_id'] = 1
    return transit_hh_emp

In [101]:
geo_df = parcel_df.copy()
geo_df['parcel_id'] = geo_df['PARCELID']

In [102]:
""" Calculate weighted average numbers of jobs available to a parcel by mode, within a max distance."""
# tract_dict = geo_df.set_index(['parcel_id']).to_dict()['census_tract']
taz_dict = geo_df.set_index(['parcel_id']).to_dict()['TAZ_P']
# county_dict = geo_df.set_index(['parcel_id']).to_dict()['county_id']

# organize origin information
origin_df = pd.DataFrame(geo_df.groupby(['parcel_id'])['HH_P'].sum())
origin_df.reset_index(inplace=True)
origin_df['taz_id'] = origin_df['parcel_id'].map(taz_dict) #need TAZ to join with transit time table 

# organize destination information
dest_df = pd.DataFrame(geo_df.groupby(['TAZ_P'])[parcel_attributes_list].sum())
dest_df.reset_index(inplace=True)
dest_df['TAZ_P'] = dest_df['TAZ_P'].astype('object')

# extract transit travel time from emme matrices from AM time period
bank = _eb.Emmebank(os.path.join(model_path, 'Banks/7to8/emmebank'))
transit_time_df = get_transit_information(bank)

NameError: name 'parcel_attributes_list' is not defined

In [None]:

transit_hh_emp = process_transit_attribute(transit_time_df, time_max, parcel_attributes_list, origin_df, dest_df, tract_dict, county_dict, taz_dict)

# flag the minority tracts
# transit_hh_emp = transit_hh_emp.merge(minority_df, left_on = 'census_tract', right_on = 'GEOID10', how = 'left')

In [25]:
average_jobs_df = get_average_jobs_transit(transit_hh_emp, geo_boundry[geo], parcel_attributes_list) 

# Jobs Accessible within 1 Mile Walk and 3 Mile Bike
Note that this is not using the bike network, but is instead using the all-streets network

In [35]:
####################
# New work starts here

In [29]:
parcel_df = pd.read_csv(r'../../../inputs/scenario/landuse/parcels_urbansim.txt', delim_whitespace=True)
nodes = pd.read_csv(r'../../../inputs/base_year/all_streets_nodes.csv')
nodes.set_index('node_id', inplace= True)   # Index required for pandana in py3
links = pd.read_csv(r'../../../inputs/base_year/all_streets_links.csv',index_col=None)

parcel_geog = pd.read_sql_table('parcel_'+base_year+'_geography', 'sqlite:///../../../inputs/db/soundcast_inputs.db')
parcel_df = parcel_df.merge(parcel_geog,left_on='PARCELID', right_on='ParcelID')

distances = { # miles to feet; 
             1: 5280, # 1 mile
             3: 15840 # 3 miles
             }

In [89]:
def assign_nodes_to_dataset(dataset, network, column_name, x_name, y_name):
    """Adds an attribute node_ids to the given dataset."""
    dataset[column_name] = network.get_node_ids(dataset[x_name].values, dataset[y_name].values)
    
def process_net_attribute(network, attr, fun, distances):
    # print "Processing %s" % attr
    newdf = None
    for dist_index, dist in distances.items():        
        res_name = "%s_%s" % (re.sub("_?p$", "", attr), dist_index) # remove '_p' if present
        # print res_name
        res_name_list.append(res_name)
        aggr = network.aggregate(dist, type=fun, decay="flat", name=attr)
        if newdf is None:
            newdf = pd.DataFrame({res_name: aggr, "node_ids": aggr.index.values})
        else:
            newdf[res_name] = aggr
    return newdf

# Get the average jobs available for each household
def get_average_jobs(household_data, geo_boundry, new_columns_name):
    data = household_data.groupby([geo_boundry]).sum()
    data.reset_index(inplace = True)
    for res_name in res_name_list: 
         weighted_res_name = 'HHweighted_' + res_name
         averaged_res_name = new_columns_name + res_name
         data[averaged_res_name] = data[weighted_res_name]/data['HH_P']
    return data

def get_weighted_jobs(household_data, new_column_name):
    for res_name in res_name_list:
          weighted_res_name = new_column_name + res_name
          household_data[weighted_res_name] = household_data[res_name]*household_data['HH_P']
          # print weighted_res_name
    return household_data

In [62]:
parcel_attributes = {"sum": ['EMPTOT_P']}

global res_name_list
res_name_list = []

# assign impedance
imp = pd.DataFrame(links.Shape_Length)
imp = imp.rename(columns = {'Shape_Length':'distance'})

# create pandana network
net = pdna.network.Network(nodes.x, nodes.y, links.from_node_id, links.to_node_id, imp)

for dist in distances:
    # print dist
    net.precompute(dist)
    
# assign network nodes to parcels, for buffer variables
assign_nodes_to_dataset(parcel_df, net, 'node_ids', 'XCOORD_P', 'YCOORD_P')
x, y = parcel_df.XCOORD_P, parcel_df.YCOORD_P
parcel_df['node_ids'] = net.get_node_ids(x, y)

# start processing attributes
newdf = None
for fun, attrs in parcel_attributes.items():    
    for attr in attrs:
        net.set(parcel_df.node_ids, variable=parcel_df[attr], name=attr)    
        res = process_net_attribute(net, attr, fun, distances)
        if newdf is None:
            newdf = res
        else:
            newdf = pd.merge(newdf, res, on="node_ids", copy=False)

In [65]:
new_parcel_df = pd.merge(newdf, parcel_df, on="node_ids", copy=False)
new_parcel_df = get_weighted_jobs(new_parcel_df, 'HHweighted_')
df = get_average_jobs(new_parcel_df, 'racial_geog_vs_reg_total', 'HHweighted_')[['racial_geog_vs_reg_total','HHweighted_EMPTOT_P_1','HHweighted_EMPTOT_P_3']]

In [88]:
df

Unnamed: 0,racial_geog_vs_reg_total,HHweighted_EMPTOT_P_1,HHweighted_EMPTOT_P_3
0,-1,,
1,0,6269.5,42700.6
2,1,13831.4,61519.8


In [73]:
# Clean up this process and the scripts
# Loop through all equity geographies

HH_P    1605263
dtype: int64

In [75]:
res_name_list

['EMPTOT_P_1', 'EMPTOT_P_3']