# Analyzing AV Results from Urbansim
### AV Scenario Results for 2050

In [1]:
import pandas as pd

In [263]:
%run opusgit/urbansim/opus_core/tools/explore_data.py -d opusgit/urbansim_data/data/psrc_parcel/runs/integrated.run_4.run_2018_04_27_10_34/2050
ex.set_dataset_pool(['psrc_parcel', 'urbansim_parcel', 'urbansim', 'opus_core'])

Exploring data in opusgit/urbansim_data/data/psrc_parcel/runs/integrated.run_4.run_2018_04_27_10_34/2050
Available tables:
['buildings', 'building_sqft_per_job', 'building_types', 'choice_set', 'development_constraints', 'development_project_proposals', 'development_project_proposal_components', 'development_templates', 'development_template_components', 'employment_adhoc_sector_groups', 'employment_adhoc_sector_group_definitions', 'employment_sectors', 'fazes', 'generic_land_use_types', 'gridcells', 'home_based_status', 'households', 'jobs', 'land_use_types', 'large_areas', 'parcels', 'persons', 'schools', 'target_vacancies', 'travel_data', 'urbansim_constants', 'zones']

Use ex.get_dataset(dataset_name) to access data. Optionally, pass an argument id_name for non-standard name of unique identifier.


In [264]:
# Load lookup data
# Zone areas for density calculation
taz = pd.read_csv(r'R:\SoundCastDocuments\AVsTravelandLandUse\model results\taz2010nowater.txt')

In [265]:
# Household data
hh = ex.get_dataset('household')
hh_df = hh.to_dataframe()

# Building and parcel data
bldg = ex.get_dataset('building')
bldg_df = bldg.to_dataframe()

parcel = ex.get_dataset('parcel')
parcel_df = parcel.to_dataframe()

# Merge parcel data to building data -> to be connected to household data
bldg_parcel = pd.merge(bldg_df, parcel_df, on='parcel_id', how='left')

# Join data to household information
hh_df = pd.merge(hh_df, bldg_parcel, on='building_id', how='left')

Calculate density (hh/acre)

In [266]:
# Total households by zone to get density
tot_hh = hh_df.groupby('zone_id').count()[['household_id']]
tot_hh = tot_hh.reset_index()
tot_hh.rename(columns={'household_id':'total_hh'}, inplace=True)
df = pd.merge(tot_hh,taz[['TAZ','acres']], left_on='zone_id',right_on='TAZ', how='left')
df['hh_per_acre'] = df['total_hh']/df['acres']

# Join this hh_per_acre data back to original to get weighted densities of households
hh_df = pd.merge(hh_df, df[['zone_id','hh_per_acre']], on='zone_id', how='left')

# Store this results as 2050 for comparison to 2040 later
hh_df_50 = hh_df

### Load 2040 results to compare differences

In [267]:
%run opusgit/urbansim/opus_core/tools/explore_data.py -d opusgit/urbansim_data/data/psrc_parcel/runs/integrated.run_4.run_2018_04_27_10_34/2040
ex.set_dataset_pool(['psrc_parcel', 'urbansim_parcel', 'urbansim', 'opus_core'])

Exploring data in opusgit/urbansim_data/data/psrc_parcel/runs/integrated.run_4.run_2018_04_27_10_34/2040
Available tables:
['buildings', 'building_sqft_per_job', 'building_types', 'choice_set', 'development_constraints', 'development_project_proposals', 'development_project_proposal_components', 'development_templates', 'development_template_components', 'employment_adhoc_sector_groups', 'employment_adhoc_sector_group_definitions', 'employment_sectors', 'fazes', 'generic_land_use_types', 'gridcells', 'home_based_status', 'households', 'jobs', 'land_use_types', 'large_areas', 'parcels', 'persons', 'schools', 'target_vacancies', 'travel_data', 'urbansim_constants', 'zones']

Use ex.get_dataset(dataset_name) to access data. Optionally, pass an argument id_name for non-standard name of unique identifier.


In [268]:
# Household data
hh = ex.get_dataset('household')
hh_df = hh.to_dataframe()

# Building and parcel data
bldg = ex.get_dataset('building')
bldg_df = bldg.to_dataframe()

parcel = ex.get_dataset('parcel')
parcel_df = parcel.to_dataframe()

# Merge parcel data to building data -> to be connected to household data
bldg_parcel = pd.merge(bldg_df, parcel_df, on='parcel_id', how='left')

# Join data to household information
hh_df = pd.merge(hh_df, bldg_parcel, on='building_id', how='left')

In [269]:
# Total households by zone to get density
tot_hh = hh_df.groupby('zone_id').count()[['household_id']]
tot_hh = tot_hh.reset_index()
tot_hh.rename(columns={'household_id':'total_hh'}, inplace=True)
df = pd.merge(tot_hh,taz[['TAZ','acres']], left_on='zone_id',right_on='TAZ', how='left')
df['hh_per_acre'] = df['total_hh']/df['acres']

# Join this hh_per_acre data back to original to get weighted densities of households
hh_df = pd.merge(hh_df, df[['zone_id','hh_per_acre']], on='zone_id', how='left')

# Store this results as 2040
hh_df_40 = hh_df

# Evaluate Density Changes

------------
Weighted density of the region increases between 2040 and 2050 - probably because of natural increase and inmigration
Increase of 17%

In [270]:
print hh_df_40['hh_per_acre'].mean()
print hh_df_50['hh_per_acre'].mean()

9.64083824255
11.1737369885


In [271]:
(hh_df_50['hh_per_acre'].mean()-hh_df_40['hh_per_acre'].mean())/hh_df_40['hh_per_acre'].mean()

0.1590005668946726

In [272]:
# Max density
print hh_df_40['hh_per_acre'].max()
print hh_df_50['hh_per_acre'].max()

149.310966912
152.209356002


In [273]:
print hh_df_40['hh_per_acre'].median()
print hh_df_50['hh_per_acre'].median()

3.86396875332
4.31032787003


### Urban Growth Boundary

In [274]:
hh_df_40.groupby('is_inside_urban_growth_boundary').count()['household_id']/hh_df_40['household_id'].count()

is_inside_urban_growth_boundary
0    0.10974
1    0.89026
Name: household_id, dtype: float64

In [275]:
hh_df_50.groupby('is_inside_urban_growth_boundary').count()['household_id']/hh_df_50['household_id'].count()

is_inside_urban_growth_boundary
0    0.102045
1    0.897955
Name: household_id, dtype: float64

# Evalute Movers' Behavior

In [276]:
# Identify households that existed in 2040 and moved by 2050
# This is done by comparing their parcel location between 2040 and 2050

In [277]:
# For households that exist in both datasets

hh_df = pd.merge(hh_df_40, hh_df_50, on='household_id', suffixes=['_40','_50'])

In [278]:
movers_df = hh_df[hh_df['parcel_id_40'] != hh_df['parcel_id_50']]

In [279]:
len(movers_df)

214752

In [280]:
print movers_df['hh_per_acre_40'].mean()
print movers_df['hh_per_acre_50'].mean()

10.8620551193
14.2858086701


In [281]:
print movers_df['hh_per_acre_40'].median()
print movers_df['hh_per_acre_50'].median()

4.62143343853
6.810813357


In [282]:
print movers_df['hh_per_acre_40'].max()
print movers_df['hh_per_acre_50'].max()

149.310966912
152.209356002


In [283]:
print movers_df['hh_per_acre_40'].min()
print movers_df['hh_per_acre_50'].min()

0.000788267967579
0.00021106726622


### Movement within Urban Growth Boundary

In [284]:
movers_df.groupby('is_inside_urban_growth_boundary_40').count()['household_id']/movers_df['household_id'].count()

is_inside_urban_growth_boundary_40
0    0.080423
1    0.919577
Name: household_id, dtype: float64

In [285]:
movers_df.groupby('is_inside_urban_growth_boundary_50').count()['household_id']/movers_df['household_id'].count()

is_inside_urban_growth_boundary_50
0    0.047166
1    0.952834
Name: household_id, dtype: float64

### External Migrations
Did people moving from outside the region choose different types of locations?

In [286]:
inmigrants_df = pd.merge(hh_df_40, hh_df_50, on='household_id', how='outer',suffixes=['_40','_50'])
# Find inmigrants by sorting household IDs that only exist in 2050
inmigrants_df = inmigrants_df[inmigrants_df['parcel_id_40'].isnull()]

In [287]:
print len(inmigrants_df)

247188


In [288]:
print inmigrants_df['hh_per_acre_50'].mean()

13.6200378631


In [289]:
print inmigrants_df['hh_per_acre_50'].max()

152.209356002


In [290]:
print inmigrants_df['hh_per_acre_50'].min()

0.00021106726622


In [291]:
print inmigrants_df['hh_per_acre_50'].median()

6.35020763118


In [292]:
inmigrants_df.groupby('is_inside_urban_growth_boundary_50').count()['household_id']/inmigrants_df['household_id'].count()

is_inside_urban_growth_boundary_50
0    0.0633
1    0.9367
Name: household_id, dtype: float64

### All Movers (internal + in-migrants)

In [293]:
# Join inmigrants and movers dataframes
df = movers_df.append(inmigrants_df)

In [294]:
len(df)

461940

In [295]:
# Note that we cannot compare 2040 differences, only 2050 as for the in-migrants
print df['hh_per_acre_50'].mean()

13.9295490818


In [296]:
print df['hh_per_acre_50'].max()

152.209356002


In [297]:
print df['hh_per_acre_50'].min()

0.00021106726622


In [298]:
print df['hh_per_acre_50'].median()

6.55217248114


In [299]:
df.groupby('is_inside_urban_growth_boundary_50').count()['household_id']/df['household_id'].count()

is_inside_urban_growth_boundary_50
0    0.055799
1    0.944201
Name: household_id, dtype: float64

### Inside Regional Growth Center?

# Compare Before and After Parcel Info
 - land price?
 - distance to work?
 - distance to CBD?

In [247]:
[i for i in movers_df.columns]

['age_of_head_40',
 'building_id_40',
 'children_40',
 'household_id',
 'income_40',
 'is_inmigrant_40',
 'persons_40',
 'tenure_40',
 'workers_40',
 'building_type_id_40',
 'improvement_value_40',
 'job_capacity_40',
 'land_area_40',
 'non_residential_sqft_40',
 'not_demolish_40',
 'parcel_id_40',
 'residential_units_40',
 'sqft_per_unit_40',
 'stories_40',
 'template_id_40',
 'year_built_40',
 '_init_error_ln_land_value_40',
 'census_block_group_id_40',
 'census_block_id_40',
 'city_id_40',
 'county_id_40',
 'elem_id_40',
 'faz_group_id_40',
 'faz_id_40',
 'grid_id_40',
 'growth_center_id_40',
 'hschool_id_40',
 'is_in_transit_zone_40',
 'is_inside_urban_growth_boundary_40',
 'is_waterfront_40',
 'land_use_type_id_40',
 'land_value_40',
 'large_area_id_40',
 'mix_split_id_40',
 'mschool_id_40',
 'parcel_sqft_40',
 'parking_price_daily_40',
 'parking_price_hourly_40',
 'parking_space_daily_40',
 'parking_space_hourly_40',
 'plan_type_id_40',
 'regional_geography_id_40',
 'school_distr

In [306]:
print hh_df['sqft_per_unit_40'].mean()
print hh_df['sqft_per_unit_50'].mean()

1488.36279751
1478.08234906


In [None]:
hh_df['']

# Other Questions
- What types of households tended to move further out?
- Worker households? High income? Lower income?