In [None]:
model_path = r'\\aws-model04\e$\soundcast_root\src\2050'
output_path = 'S:\\angela\job_housing\soundcast_2050'
parcel_file_name = 'inputs\\accessibility\\parcels_urbansim.txt'
geo_file_name = 'inputs\\accessibility\\parcels_suzanne.csv'
nodes_file_name = 'inputs\\accessibility\\all_streets_nodes_2014.csv'
links_file_name = 'inputs\\accessibility\\all_streets_links_2014.csv'
#transit_stop_name = 'inputs\\accessibility\\transit_stops_2014.csv'

In [None]:
import inro.emme.database.emmebank as _eb
import pandas as pd
import numpy as np
import pandana as pdna
import os
import re 
import sys
from pyproj import Proj, transform
sys.path.append(os.getcwd())

In [None]:
year = 2050
geo = 'region'
transit_time_max = 60
bank_tod = '7to8'
res_name_list = []

In [None]:
distances = { # in meters; 
              # keys correspond to suffices of the resulting parcel columns
              # ORIGINAL VALUES in feet!!
             1: 2640, # 0.5 mile
             #2: 5280, # 1 mile
             3: 15840 # 3 miles
             }
geo_boundry = {'county': 'county_id',
               'city': 'city_id', 
               'taz': 'TAZ_P',
               'region': 'region_id'}
parcel_attributes = {
              "sum": ['EMPTOT_P'],
              #"ave": [ "PPRICDYP", "PPRICHRP"]
              }

'''      
parcel_attributes = {
              "sum": ["HH_P", "STUGRD_P", "STUHGH_P", "STUUNI_P", 
                      "EMPMED_P", "EMPOFC_P", "EMPEDU_P", "EMPFOO_P", "EMPGOV_P", "EMPIND_P", 
                      "EMPSVC_P", "EMPOTH_P", "EMPTOT_P", "EMPRET_P",
                      "PARKDY_P", "PARKHR_P", "NPARKS", "APARKS", "daily_weighted_spaces", "hourly_weighted_spaces"],
              "ave": [ "PPRICDYP", "PPRICHRP"],
              }
'''

In [None]:
# get transit information, including walking to the bus, waiting, transferring, time on the transit vehicle
def get_transit_information(bank):
    bus_time = bank.matrix('auxwa').get_numpy_data() + bank.matrix('twtwa').get_numpy_data() + bank.matrix('ivtwa').get_numpy_data() 
    rail_time = bank.matrix('auxwr').get_numpy_data() + bank.matrix('twtwr').get_numpy_data() + bank.matrix('ivtwr').get_numpy_data() 
    transit_time = np.minimum(bus_time, rail_time)
    transit_time = transit_time[0:3700, 0:3700]
    transit_time_df = pd.DataFrame(transit_time)
    transit_time_df['from'] = transit_time_df.index
    transit_time_df = pd.melt(transit_time_df, id_vars= 'from', value_vars=list(transit_time_df.columns[0:3700]), var_name = 'to', value_name='transit_time')
    # add 1 into zone id before join with parcel data
    transit_time_df['to'] = transit_time_df['to'] + 1 
    transit_time_df['from'] = transit_time_df['from'] + 1
    return transit_time_df

In [None]:
def process_transit_attribute(transit_time_data, transit_time_max, parcel_taz_id, transit_taz_id, attr_list, origin_df, dest_df):
    # get destination information
    transit = transit_time_data[transit_time_data.transit_time <= transit_time_max]
    transit = transit.merge(dest_df, how = 'left', left_on = 'to', right_on = parcel_taz_id)
    # groupby destination information by origin TAZ id 
    transit_emp = pd.DataFrame(transit.groupby(transit_taz_id)[attr_list].sum())
    print transit_emp.head()
    transit_emp.reset_index(inplace=True)
    print transit_emp.head()
    transit_df = pd.merge(transit_emp, origin_df, left_on = transit_taz_id, right_on = parcel_taz_id)
    print transit_df.head()
    return transit_df

In [None]:
def assign_nodes_to_dataset(dataset, network, column_name, x_name, y_name):
    """Adds an attribute node_ids to the given dataset."""
    dataset[column_name] = network.get_node_ids(dataset[x_name].values, dataset[y_name].values)

In [None]:
def process_net_attribute(network, attr, fun):
    print "Processing %s" % attr
    newdf = None
    for dist_index, dist in distances.iteritems():        
        res_name = "%s_%s" % (re.sub("_?p$", "", attr), dist_index) # remove '_p' if present
        print res_name
        res_name_list.append(res_name)
        aggr = network.aggregate(dist, type=fun, decay="flat", name=attr)
        if newdf is None:
            newdf = pd.DataFrame({res_name: aggr, "node_ids": aggr.index.values})
        else:
            newdf[res_name] = aggr
    return newdf

In [None]:
# get household weighted/averaged jobs 
def get_weighted_jobs(df, res_name_list):
    for res_name in res_name_list:
          weighted_res_name = 'HHweighted_' + res_name
          df[weighted_res_name] = df[res_name]*df['HH_P']
          print weighted_res_name
    print df.head()
    return df

In [None]:
def get_average_jobs(df, geo, res_name_list):
    df_groupby = df.groupby([geo]).sum()
    df_groupby.reset_index(inplace = True)
    for res_name in res_name_list: 
         weighted_res_name = 'HHweighted_' + res_name
         averaged_res_name = 'HHaveraged_' + res_name
         df_groupby[averaged_res_name] = df_groupby[weighted_res_name]/df_groupby['HH_P']
    print df_groupby.head()
    return df_groupby

In [None]:
def check_missing_data(df, col):
    #check for missing data!
    for col_name in df.columns:
        # daysim does not use EMPRSC_P
        if col_name <> col:
            if df[col_name].sum() == 0:
                print col_name + ' column sum is zero! Exiting program.'


In [None]:
'''
if you want to compute jobs within XXX transit time, 
please only run the code within this box
'''

output_file_name = geo + '_transit_' + str(year) + '_' + str(transit_time_max) + '_' + 'min.csv'

# read data
parcel_df = pd.read_csv(os.path.join(output_path, parcel_file_name), sep = ' ')
geo_df = pd.DataFrame.from_csv(os.path.join(output_path, geo_file_name), sep = ',', index_col = None )
check_missing_data(parcel_df, 'EMPRSC_P')

# organize origin TAZ information
geo_df = pd.merge(parcel_df, geo_df, left_on = 'PARCELID', right_on = 'parcel_id')
city_dict = geo_df.set_index(['TAZ_P']).to_dict()['city_id']
county_dict = geo_df.set_index(['TAZ_P']).to_dict()['county_id']
city_name_dict = geo_df.set_index(['TAZ_P']).to_dict()['city_name']
origin_df = pd.DataFrame(geo_df.groupby(['TAZ_P'])['HH_P'].sum())
origin_df.reset_index(inplace=True)
origin_df['city_id'] = origin_df['TAZ_P'].map(city_dict)
origin_df['county_id'] = origin_df['TAZ_P'].map(county_dict)
origin_df['city_name'] = origin_df['TAZ_P'].map(city_name_dict)
origin_df['region_id'] = 1

# orgnize destination TAZ information
parcel_attributes_list = parcel_attributes['sum']
print parcel_attributes_list
dest_df = pd.DataFrame(geo_df.groupby(['TAZ_P'])[parcel_attributes_list].sum())
dest_df.reset_index(inplace=True)

# process transit time
bank = _eb.Emmebank(os.path.join(model_path, 'Banks', bank_tod, 'emmebank'))
transit_time_df = get_transit_information(bank)
transit_df = process_transit_attribute(transit_time_df, transit_time_max, 'TAZ_P', 'from', parcel_attributes_list, origin_df, dest_df)

# jobs by household 
weighted_jobs_df = get_weighted_jobs(transit_df, parcel_attributes_list)
average_jobs_df = get_average_jobs(weighted_jobs_df, geo_boundry[geo], parcel_attributes_list) 

average_jobs_df.to_csv(os.path.join(output_path, output_file_name), index=False)
print 'transit done'

In [None]:
'''
if you want to compute jobs within XXX walk/bike distances, 
please only run the code within this box
'''

output_file_name = geo + str(year) +'_0.5_3_miles.csv'  

# read data
parcel_df = pd.read_csv(os.path.join(output_path, parcel_file_name), sep = ' ')
nodes = pd.DataFrame.from_csv(os.path.join(output_path, nodes_file_name), sep = ',')
links = pd.DataFrame.from_csv(os.path.join(output_path, links_file_name), sep = ',', index_col = None )
geo_df = pd.DataFrame.from_csv(os.path.join(output_path, geo_file_name), sep = ',', index_col = None )
check_missing_data(parcel_df, 'EMPRSC_P')

new_parcel_df = pd.merge(parcel_df, geo_df, left_on = 'PARCELID', right_on='parcel_id', how = 'left')
new_parcel_df['region_id'] = 1

# assign impedance
imp = pd.DataFrame(links.Shape_Length)
imp = imp.rename(columns = {'Shape_Length':'distance'})
# create pandana network
net = pdna.network.Network(nodes.x, nodes.y, links.from_node_id, links.to_node_id, imp)

for dist in distances:
    print dist
    net.precompute(dist)

# assign network nodes to parcels, for buffer variables
assign_nodes_to_dataset(new_parcel_df, net, 'node_ids', 'XCOORD_P', 'YCOORD_P')
x, y = new_parcel_df.XCOORD_P, new_parcel_df.YCOORD_P
new_parcel_df['node_ids'] = net.get_node_ids(x, y)

# start processing attributes
newdf = None
for fun, attrs in parcel_attributes.iteritems():    
    for attr in attrs:
        net.set(new_parcel_df.node_ids, variable=parcel_df[attr], name=attr)    
        res = process_net_attribute(net, attr, fun)
        if newdf is None:
            newdf = res
        else:
            newdf = pd.merge(newdf, res, on="node_ids", copy=False)
print res_name_list

# jobs by household
parcel_net_df = pd.merge(newdf, new_parcel_df[['node_ids', 'HH_P', geo_boundry[geo]]], on="node_ids", copy=False)
parcel_net_df = get_weighted_jobs(parcel_net_df, res_name_list)  
parcel_net_df_groupby = get_average_jobs(parcel_net_df, geo_boundry[geo], res_name_list)


parcel_net_df_groupby.to_csv(os.path.join(output_path, output_file_name), index=False)
print 'walk/bike done'