In [2]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString, Polygon

# Household Records
Attach geography based on lat-long values

In [149]:
hh = pd.read_excel(r'J:\Projects\Surveys\HHTravel\Survey2017\Data\Geolayers\1-Household-v3-Public.xlsx',
                  sheetname='1-Household')

### Geography of past HH location

In [150]:
geog = hh.apply(lambda x: Point((float(x.prev_home_lng), float(x.prev_home_lat))), axis=1)
geog = gpd.GeoSeries(geog)
geog.crs = {'init': 'epsg:4326'}    # WGS 84
hh['lat_lon_geog'] =  geog
hh['geometry'] = geog.to_crs(epsg='2285') # Replace default geometry field with the projected epsg=2285 projection to match shapefiles
hh.crs = {'init': 'epsg:2285'}

In [151]:
# Filter out for households that have coordinates for previous home location
_hh = hh[-hh['prev_home_lat'].isnull()]
_hh.crs = {'init': 'epsg:2285'}

In [152]:
# Load geography layers
tract = gpd.GeoDataFrame.from_file(r'W:\geodata\census\Tract\tract2010.shp')
tract.crs = {'init' :'epsg:2285'}

block = gpd.GeoDataFrame.from_file(r'W:\geodata\census\Block\block2010.shp')
block.crs = {'init' :'epsg:2285'}

block_group = gpd.GeoDataFrame.from_file(r'R:\Brice\blockgrp2010.shp')
block_group.crs = {'init' :'epsg:2285'} # PUMA is not a projected shapefile; need to project 

puma = gpd.GeoDataFrame.from_file(r'W:\geodata\census\PUMAs\reg10puma.shp')
puma.crs = {'init' :'epsg:4326'}
puma['geometry'] = puma['geometry'].to_crs(epsg='2285')
puma.crs = {'init': 'epsg:2285'}

rgc = gpd.GeoDataFrame.from_file(r'R:\Brice\urbcen.shp')
rgc.crs = {'init' :'epsg:2285'}

taz = gpd.GeoDataFrame.from_file(r'W:\geodata\forecast\taz2010.shp')
taz.crs = {'init' :'epsg:2285'}

In [153]:
def spatial_join(gdf1, gdf2, keep_field, rename_field, crs):
    """Spatial join two geodataframes, left intersect with base on gdf1"""
    df = gpd.sjoin(gdf1, gdf2[['geometry',keep_field]], how="left", op='intersects')
    df = df.rename(columns={keep_field: rename_field})
    df = df.drop(['index_right'], axis=1)
    df.crs = crs
    
    return df

In [154]:
# Attach tract ID
df = spatial_join(gdf1=_hh, gdf2=tract, keep_field='GEOID10', rename_field='prev_home_tract', crs=hh.crs)

# Attach block ID
df = spatial_join(gdf1=df, gdf2=block, keep_field='GEOID10', rename_field='prev_home_block', crs=hh.crs)

# Attach block group ID
df = spatial_join(gdf1=df, gdf2=block_group, keep_field='GEOID10', rename_field='prev_home_bg', crs=hh.crs)

# attach puma
df = spatial_join(gdf1=df, gdf2=puma, keep_field='PUMACE10', rename_field='prev_home_puma10', crs=hh.crs)

# attach rgc
df = spatial_join(gdf1=df, gdf2=rgc, keep_field='NAME', rename_field='prev_home_rgcname', crs=hh.crs)

# attach taz
df = spatial_join(gdf1=df, gdf2=taz, keep_field='TAZ', rename_field='prev_home_taz', crs=hh.crs)

In [156]:
# Merge with original file
df_hh = pd.merge(hh, df[['hhid','prev_home_tract','prev_home_block','prev_home_bg',
                        'prev_home_puma10','prev_home_rgcname','prev_home_taz']], on='hhid', how='left')

# Drop geometry fields
df_hh = df_hh.drop(['lat_lon_geog','geometry'], axis=1)

# Export to CSV
df_hh.to_excel(r'J:\Projects\Surveys\HHTravel\Survey2017\Data\Geolayers\geolayers-added\1-Household-v3-Public.xlsx',
          index=False, sheet_name='1-Household')

# Person File

In [157]:
person = pd.read_excel(r'J:\Projects\Surveys\HHTravel\Survey2017\Data\Geolayers\2-Person-v3-Public.xlsx',
                  sheetname='2-Person', skiprows=1)

### Current Work Location

In [158]:
geog = person.apply(lambda x: Point((float(x.work_lng), float(x.work_lat))), axis=1)
geog = gpd.GeoSeries(geog)
geog.crs = {'init': 'epsg:4326'}    # WGS 84
person['lat_lon_geog'] =  geog
person['geometry'] = geog.to_crs(epsg='2285') # Replace default geometry field with the projected epsg=2285 projection to match shapefiles
person.crs = {'init': 'epsg:2285'}

In [159]:
# Filter out for persons that have coordinates for work location
_person = person[-person['work_lat'].isnull()]
_person.crs = {'init': 'epsg:2285'}

In [160]:
# Attach tract ID
df = spatial_join(gdf1=_person, gdf2=tract, keep_field='GEOID10', rename_field='work_tract', crs=person.crs)

# Attach block ID
df = spatial_join(gdf1=df, gdf2=block, keep_field='GEOID10', rename_field='work_block', crs=person.crs)

# Attach block group ID
df = spatial_join(gdf1=df, gdf2=block_group, keep_field='GEOID10', rename_field='work_bg', crs=person.crs)

# attach puma
df = spatial_join(gdf1=df, gdf2=puma, keep_field='PUMACE10', rename_field='work_puma10', crs=person.crs)

# attach rgc
df = spatial_join(gdf1=df, gdf2=rgc, keep_field='NAME', rename_field='work_rgcname', crs=person.crs)

# attach taz
df = spatial_join(gdf1=df, gdf2=taz, keep_field='TAZ', rename_field='work_taz', crs=person.crs)

In [161]:
# Merge with original file
df_person = pd.merge(person, df[['personid','work_tract','work_block','work_bg',
                        'work_puma10','work_rgcname','work_taz']], on='personid', how='left')

# Drop geometry fields
df_person = df_person.drop(['lat_lon_geog','geometry'], axis=1)

### Previous Work Location

In [162]:
geog = person.apply(lambda x: Point((float(x.prev_work_lng), float(x.prev_work_lat))), axis=1)
geog = gpd.GeoSeries(geog)
geog.crs = {'init': 'epsg:4326'}    # WGS 84
person['lat_lon_geog'] =  geog
person['geometry'] = geog.to_crs(epsg='2285') # Replace default geometry field with the projected epsg=2285 projection to match shapefiles
person.crs = {'init': 'epsg:2285'}

In [163]:
# Filter out for persons that have coordinates for previous work location
_person = person[-person['prev_work_lat'].isnull()]
_person.crs = {'init': 'epsg:2285'}

In [164]:
# Attach tract ID
df = spatial_join(gdf1=_person, gdf2=tract, keep_field='GEOID10', rename_field='prev_work_tract', crs=person.crs)

# Attach block ID
df = spatial_join(gdf1=df, gdf2=block, keep_field='GEOID10', rename_field='prev_work_block', crs=person.crs)

# Attach block group ID
df = spatial_join(gdf1=df, gdf2=block_group, keep_field='GEOID10', rename_field='prev_work_bg', crs=person.crs)

# attach puma
df = spatial_join(gdf1=df, gdf2=puma, keep_field='PUMACE10', rename_field='prev_work_puma10', crs=person.crs)

# attach rgc
df = spatial_join(gdf1=df, gdf2=rgc, keep_field='NAME', rename_field='prev_work_rgcname', crs=person.crs)

# attach taz
df = spatial_join(gdf1=df, gdf2=taz, keep_field='TAZ', rename_field='prev_work_taz', crs=person.crs)

In [165]:
# Merge with original file
df_person = pd.merge(df_person, df[['personid','prev_work_tract','prev_work_block','prev_work_bg',
                        'prev_work_puma10','prev_work_rgcname','prev_work_taz']], on='personid', how='left')

### School Location

In [166]:
geog = person.apply(lambda x: Point((float(x.school_loc_lng), float(x.school_loc_lat))), axis=1)
geog = gpd.GeoSeries(geog)
geog.crs = {'init': 'epsg:4326'}    # WGS 84
person['lat_lon_geog'] =  geog
person['geometry'] = geog.to_crs(epsg='2285') # Replace default geometry field with the projected epsg=2285 projection to match shapefiles
person.crs = {'init': 'epsg:2285'}

In [167]:
# Filter out for persons that have coordinates for school location
_person = person[-person['school_loc_lat'].isnull()]
_person.crs = {'init': 'epsg:2285'}

In [168]:
# Attach tract ID
df = spatial_join(gdf1=_person, gdf2=tract, keep_field='GEOID10', rename_field='school_tract', crs=person.crs)

# Attach block ID
df = spatial_join(gdf1=df, gdf2=block, keep_field='GEOID10', rename_field='school_block', crs=person.crs)

# Attach block group ID
df = spatial_join(gdf1=df, gdf2=block_group, keep_field='GEOID10', rename_field='school_bg', crs=person.crs)

# attach puma
df = spatial_join(gdf1=df, gdf2=puma, keep_field='PUMACE10', rename_field='school_puma10', crs=person.crs)

# attach rgc
df = spatial_join(gdf1=df, gdf2=rgc, keep_field='NAME', rename_field='school_rgcname', crs=person.crs)

# attach taz
df = spatial_join(gdf1=df, gdf2=taz, keep_field='TAZ', rename_field='school_taz', crs=person.crs)

In [169]:
# Merge with original file
df_person = pd.merge(df_person, df[['personid','school_tract','school_block','school_bg',
                        'school_puma10','school_rgcname','school_taz']], on='personid', how='left')

In [170]:
# Write Person file to excel
df_person.to_excel(r'J:\Projects\Surveys\HHTravel\Survey2017\Data\Geolayers\geolayers-added\2-Person-v3-Public.xlsx',
          index=False, sheet_name='2-Person')

# Trip File

In [186]:
trip = pd.read_excel(r'J:\Projects\Surveys\HHTravel\Survey2017\Data\Geolayers\5-Trip-v2-Public.xlsx',
                  sheetname='5-Trip', skiprows=1)

### Trip Origin

In [187]:
geog = trip.apply(lambda x: Point((float(x.origin_lng), float(x.origin_lat))), axis=1)
geog = gpd.GeoSeries(geog)
geog.crs = {'init': 'epsg:4326'}    # WGS 84
trip['lat_lon_geog'] =  geog
trip['geometry'] = geog.to_crs(epsg='2285') # Replace default geometry field with the projected epsg=2285 projection to match shapefiles
trip.crs = {'init': 'epsg:2285'}

In [188]:
# Attach tract ID
df = spatial_join(gdf1=trip, gdf2=tract, keep_field='GEOID10', rename_field='o_tract', crs=person.crs)

# Attach block ID
df = spatial_join(gdf1=df, gdf2=block, keep_field='GEOID10', rename_field='o_block', crs=person.crs)

# Attach block group ID
df = spatial_join(gdf1=df, gdf2=block_group, keep_field='GEOID10', rename_field='o_bg', crs=person.crs)

# attach puma
df = spatial_join(gdf1=df, gdf2=puma, keep_field='PUMACE10', rename_field='o_puma10', crs=person.crs)

# attach rgc
df = spatial_join(gdf1=df, gdf2=rgc, keep_field='NAME', rename_field='o_rgcname', crs=person.crs)

# attach taz
df = spatial_join(gdf1=df, gdf2=taz, keep_field='TAZ', rename_field='o_taz', crs=person.crs)

In [189]:
# Merge with original file
df_trip = pd.merge(trip, df[['tripid','o_tract','o_block','o_bg',
                        'o_puma10','o_rgcname','o_taz']], on='tripid', how='left')

### Trip Destination

In [190]:
geog = trip.apply(lambda x: Point((float(x.dest_lng), float(x.dest_lat))), axis=1)
geog = gpd.GeoSeries(geog)
geog.crs = {'init': 'epsg:4326'}    # WGS 84
trip['lat_lon_geog'] =  geog
trip['geometry'] = geog.to_crs(epsg='2285') # Replace default geometry field with the projected epsg=2285 projection to match shapefiles
trip.crs = {'init': 'epsg:2285'}

In [191]:
# Attach tract ID
df = spatial_join(gdf1=trip, gdf2=tract, keep_field='GEOID10', rename_field='d_tract', crs=person.crs)

# Attach block ID
df = spatial_join(gdf1=df, gdf2=block, keep_field='GEOID10', rename_field='d_block', crs=person.crs)

# Attach block group ID
df = spatial_join(gdf1=df, gdf2=block_group, keep_field='GEOID10', rename_field='d_bg', crs=person.crs)

# attach puma
df = spatial_join(gdf1=df, gdf2=puma, keep_field='PUMACE10', rename_field='d_puma10', crs=person.crs)

# attach rgc
df = spatial_join(gdf1=df, gdf2=rgc, keep_field='NAME', rename_field='d_rgcname', crs=person.crs)

# attach taz
df = spatial_join(gdf1=df, gdf2=taz, keep_field='TAZ', rename_field='d_taz', crs=person.crs)

In [192]:
# Merge with original file
df_trip = pd.merge(df_trip, df[['tripid','d_tract','d_block','d_bg',
                        'd_puma10','d_rgcname','d_taz']], on='tripid', how='left')

In [193]:
# Drop geometry fields
df_trip = df_trip.drop(['lat_lon_geog','geometry'], axis=1)

df_trip.to_excel(r'J:\Projects\Surveys\HHTravel\Survey2017\Data\Geolayers\geolayers-added\5-Trip-v3-Public.xlsx',
          index=False, sheet_name='5-Trip')