Land use amenities to get:
- [x] % white: DP05_0037PE
- [x] median home value: B25077_001E
- [x] median # rooms: B25018_001E
- [ ] transit access
- [x] access to jobs
- [x] median income: B06011_001E
- [x] median life expectancy
- [x] % bach degree: S1501_C02_012E
- [ ] avg lot size
- [x] % poverty: S1701_C03_001E

In [113]:
import pandas as pd
import dask.dataframe as dd
from census import Census
import requests
import pandana as pdna
import geopandas as gpd
from shapely.geometry import Point
import numpy as np
%matplotlib inline

### Load Land Use Data

Census Tract Geographies

In [92]:
tracts = gpd.read_file('../data/tl_2017_06_tract.shp', dtype={'INTPTLAT': float, 'INTPTLON':float})

In [94]:
tracts['centroid'] = tracts.centroid

In [95]:
tracts = tracts[['GEOID', 'ALAND', 'centroid']]

MTC Data

In [2]:
parcels = pd.read_csv('../data/parcels.csv', dtype={'block_id':str}, index_col='primary_id')
nodes = pd.read_csv('../data/drive_nodes.csv').set_index('osmid')
edges = pd.read_csv('../data/drive_edges.csv').set_index('uniqueid')
access_vars = pd.read_csv('../data/drive_net_vars.csv')

  mask |= (ar1 == a)


In [3]:
net = pdna.Network(
            nodes.x, nodes.y, edges.u,
            edges.v, edges[['length']],
            twoway=False)

In [4]:
parcels['node_id'] = net.get_node_ids(parcels.x, parcels.y)

In [5]:
parcels = pd.merge(parcels, access_vars, left_on='node_id', right_on='osmid')

In [6]:
parcels['tract_id'] = parcels['block_id'].str[:11]

In [7]:
parcels = parcels[['tract_id', 'acres', 'jobs_10000', 'pop_jobs_ratio_10000']]

In [8]:
parcels_jobs_access = parcels.groupby('tract_id').median()

In [9]:
parcels_jobs_access.rename(columns={col: 'med_' + col for col in parcels_jobs_access.columns}, inplace=True)
parcels_jobs_access.rename(columns={'med_acres': 'med_parcel_size'}, inplace=True)

CDC Data

In [10]:
life_exp = pd.read_csv('https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NVSS/USALEEP/CSV/CA_A.CSV', dtype={'Tract ID': str})

In [11]:
life_exp.rename(columns={'Tract ID': 'tract_id', 'e(0)': 'life_exp'}, inplace=True)

ACS Data

In [12]:
tables_dict = {
    'data profiles': {'suburl': '/profile', 'tables': 'DP05_0037PE'},
    'subject tables': {'suburl': '/subject', 'tables': 'S1501_C02_012E,S1701_C03_001E'},
    'detailed tables': {'suburl': '', 'tables': 'B25077_001E,B25018_001E,B06011_001E'}
}
base_url = 'https://api.census.gov/data/2017/acs/acs5{0}?get=' + \
    '{1}&for=tract:*&in=state:06&in=county:001,013,041,055,075,081,085,095,097'

In [13]:
for table in tables_dict.keys():
    print('Getting data from {0}'.format(table))
    url = base_url.format(tables_dict[table]['suburl'], tables_dict[table]['tables'])
    raw = requests.get(url)
    df = pd.read_json(raw.content)
    df = df[1:] #take the data less the header row
    df.columns = raw.json()[0]
    for col in df.columns:
        if col not in ['state', 'county', 'tract']:
            df[col] = df[col].astype(float)
    tables_dict[table]['data'] = df

Getting data from data profiles
Getting data from subject tables
Getting data from detailed tables


In [14]:
acs_all = pd.DataFrame(columns=['state','county','tract'])

In [15]:
for table in tables_dict.keys():
    acs_all = pd.merge(acs_all, tables_dict[table]['data'], on=['state', 'county', 'tract'], how='right')

In [16]:
acs_all.rename(columns={
    'DP05_0037PE': 'pct_white', 'S1501_C02_012E': 'pct_bach', 'S1701_C03_001E': 'pct_poverty',
    'B25077_001E': 'med_home_val', 'B25018_001E': 'med_num_rooms', 'B06011_001E': 'med_income'}, inplace=True)

In [22]:
acs_all['tract_id'] = acs_all['state'] + acs_all['county'] + acs_all['tract']
acs_all.drop(columns=['state','county','tract'], inplace=True)

In [23]:
acs_all.head()

Unnamed: 0,pct_white,pct_bach,pct_poverty,med_home_val,med_num_rooms,med_income,tract_id
0,79.6,40.4,6.9,1854600.0,4.4,73762.0,6081612500
1,83.2,20.8,3.0,798700.0,5.9,36394.0,6081613501
2,46.9,15.9,24.0,722600.0,3.1,24035.0,6081610201
3,46.3,19.5,26.3,342100.0,2.9,26061.0,6081610202
4,43.8,36.2,3.5,1232800.0,5.7,104221.0,6081610303


In [101]:
land_use = pd.merge(acs_all, life_exp[['tract_id','life_exp']], on='tract_id')

In [102]:
land_use = pd.merge(land_use, parcels_jobs_access, on='tract_id')

In [103]:
land_use = pd.merge(land_use, tracts, left_on='tract_id', right_on='GEOID')

### Create OD matrix

In [104]:
mix = pd.MultiIndex.from_product([land_use.tract_id, land_use.tract_id], names=['from', 'to'])

In [105]:
ods = pd.DataFrame(index=mix).reset_index()

In [106]:
ods = ods.merge(land_use, left_on='from', right_on='tract_id')

In [107]:
ods = ods.merge(land_use, left_on='to', right_on='tract_id', suffixes=('_from', '_to'))

In [108]:
ods.head()

Unnamed: 0,from,to,pct_white_from,pct_bach_from,pct_poverty_from,med_home_val_from,med_num_rooms_from,med_income_from,tract_id_from,life_exp_from,...,med_num_rooms_to,med_income_to,tract_id_to,life_exp_to,med_parcel_size_to,med_jobs_10000_to,med_pop_jobs_ratio_10000_to,GEOID_to,ALAND_to,centroid_to
0,6081612500,6081612500,79.6,40.4,6.9,1854600.0,4.4,73762.0,6081612500,84.9,...,4.4,73762.0,6081612500,84.9,0.186643,125491.0,1.841444,6081612500,1993832,POINT (-122.17782 37.45570)
1,6081613501,6081612500,83.2,20.8,3.0,798700.0,5.9,36394.0,6081613501,83.4,...,4.4,73762.0,6081612500,84.9,0.186643,125491.0,1.841444,6081612500,1993832,POINT (-122.17782 37.45570)
2,6081610201,6081612500,46.9,15.9,24.0,722600.0,3.1,24035.0,6081610201,80.6,...,4.4,73762.0,6081612500,84.9,0.186643,125491.0,1.841444,6081612500,1993832,POINT (-122.17782 37.45570)
3,6081610202,6081612500,46.3,19.5,26.3,342100.0,2.9,26061.0,6081610202,77.4,...,4.4,73762.0,6081612500,84.9,0.186643,125491.0,1.841444,6081612500,1993832,POINT (-122.17782 37.45570)
4,6081610303,6081612500,43.8,36.2,3.5,1232800.0,5.7,104221.0,6081610303,81.2,...,4.4,73762.0,6081612500,84.9,0.186643,125491.0,1.841444,6081612500,1993832,POINT (-122.17782 37.45570)


In [109]:
ods.columns

Index(['from', 'to', 'pct_white_from', 'pct_bach_from', 'pct_poverty_from',
       'med_home_val_from', 'med_num_rooms_from', 'med_income_from',
       'tract_id_from', 'life_exp_from', 'med_parcel_size_from',
       'med_jobs_10000_from', 'med_pop_jobs_ratio_10000_from', 'GEOID_from',
       'ALAND_from', 'centroid_from', 'pct_white_to', 'pct_bach_to',
       'pct_poverty_to', 'med_home_val_to', 'med_num_rooms_to',
       'med_income_to', 'tract_id_to', 'life_exp_to', 'med_parcel_size_to',
       'med_jobs_10000_to', 'med_pop_jobs_ratio_10000_to', 'GEOID_to',
       'ALAND_to', 'centroid_to'],
      dtype='object')

### OD Deltas

In [116]:
ods['pct_white_delta'] = ods['pct_white_to'] - ods['pct_white_from']
ods['pct_bach_delta'] = ods['pct_bach_to'] - ods['pct_bach_from']
ods['pct_poverty_delta'] = ods['pct_poverty_to'] - ods['pct_poverty_from']
ods['med_home_val_pct_diff'] = (
    ods['med_home_val_to'] - ods['med_home_val_from']) / ods['med_home_val_from']
ods['med_num_rooms_delta'] = ods['med_num_rooms_to'] - ods['med_num_rooms_from']
ods['med_income_pct_diff'] = (ods['med_income_to'] - ods['med_income_from']) / ods['med_income_from']
ods['life_exp_delta'] = ods['life_exp_to'] - ods['life_exp_from']
ods['jobs_10km_pct_diff'] = (ods['med_jobs_10000_to'] - ods['med_jobs_10000_from']) / ods['med_jobs_10000_from']
ods['pop_jobs_ratio_delta'] = ods['med_pop_jobs_ratio_10000_to'] - ods['med_pop_jobs_ratio_10000_from']
ods['med_parcel_size_pct_diff'] = (ods['med_parcel_size_to'] - ods['med_parcel_size_from']) / ods['med_parcel_size_from']

# distance
from_gs = gpd.GeoSeries(
    ods['centroid_from'],
    crs={'init': 'epsg:4326'}).to_crs(epsg='2768')
to_gs = gpd.GeoSeries(
    ods['centroid_to'],
    crs={'init': 'epsg:4326'}).to_crs(epsg='2768')
dists = from_gs.distance(to_gs)
ods['distance'] = dists
ods.loc[ods['from'] == ods['to'], 'distance'] = np.sqrt(ods.loc[ods['from'] == ods['to'], 'ALAND_from']) / 2

In [142]:
ods.head()

Unnamed: 0,tract_id_from,tract_id_to,pct_white_delta,pct_bach_delta,pct_poverty_delta,med_home_val_pct_diff,med_num_rooms_delta,med_income_pct_diff,life_exp_delta,jobs_10km_pct_diff,pop_jobs_ratio_delta,med_parcel_size_pct_diff,distance
0,6081612500,6081612500,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,706.015581
1,6081613501,6081612500,-3.6,19.6,3.9,1.322023,-1.5,1.026763,1.5,27.168575,-2.34464,0.127392,22653.899784
2,6081610201,6081612500,32.7,24.5,-17.1,1.566565,1.3,2.068941,4.3,0.004539,0.019333,0.589057,4533.923986
3,6081610202,6081612500,33.3,20.9,-19.4,4.421222,1.5,1.83036,7.5,-0.01615,0.056896,-0.198132,5925.981557
4,6081610303,6081612500,35.8,4.2,3.4,0.50438,-1.3,-0.292254,3.7,0.340931,0.25893,0.822274,10953.13879


### Load mover data

In [119]:
movers = pd.read_csv('../data/movers.csv', index_col=0, dtype={
    'PROP_FIPSCD_from': str, 'PROP_CENSUSTRACT_from': str, 'PROP_FIPSCD_to': str, 'PROP_CENSUSTRACT_to': str})

In [120]:
movers = movers[(movers['to_effdate'] > 201012) & (movers['to_effdate'] < 201901)]

In [121]:
movers['block_id_from'] = movers['PROP_FIPSCD_from'] + movers['PROP_CENSUSTRACT_from']
movers['tract_id_from'] = movers['block_id_from'].str[:11]
movers['block_id_to'] = movers['PROP_FIPSCD_to'] + movers['PROP_CENSUSTRACT_to']
movers['tract_id_to'] = movers['block_id_to'].str[:11]
movers['unit_price_from'] = movers['PROP_VALCALC_from'] / movers['PROP_UNVBLDSQFT_from']
movers['unit_price_to'] = movers['PROP_VALCALC_to'] / movers['PROP_UNVBLDSQFT_to']

### Compute mover deltas

In [122]:
# property quality
movers['qlty_decrease'] = 0
movers.loc[
    (movers['PROP_QLTY_from'] == 'QLU') &
    (movers['PROP_QLTY_to'].isin(['QEX', 'QGO', 'QAV', 'QFA', 'QPO'])), 'qlty_decrease'] = 1
movers.loc[
    (movers['PROP_QLTY_from'] == 'QEX') &
    (movers['PROP_QLTY_to'].isin(['QGO', 'QAV', 'QFA', 'QPO'])), 'qlty_decrease'] = 1
movers.loc[
    (movers['PROP_QLTY_from'] == 'QGO') &
    (movers['PROP_QLTY_to'].isin(['QAV', 'QFA', 'QPO'])), 'qlty_decrease'] = 1
movers.loc[
    (movers['PROP_QLTY_from'] == 'QAV') &
    (movers['PROP_QLTY_to'].isin(['QFA', 'QPO'])), 'qlty_decrease'] = 1
movers.loc[
    (movers['PROP_QLTY_from'] == 'QFA') &
    (movers['PROP_QLTY_to'].isin(['QPO'])), 'qlty_decrease'] = 1

# property value
movers['prop_val_pct_chg'] = (movers['PROP_VALCALC_to'] - movers['PROP_VALCALC_from']) / movers['PROP_VALCALC_from']

# property size
movers['prop_size_pct_chg'] = (
    movers['PROP_UNVBLDSQFT_to'] - movers['PROP_UNVBLDSQFT_from']) / movers['PROP_UNVBLDSQFT_from']

# unit price
movers['price_sqft_pct_chg'] = (movers['unit_price_to'] - movers['unit_price_from']) / movers['unit_price_from']

# bedrooms
movers['bedrooms_delta'] = movers['PROP_BEDRMS_to'] - movers['PROP_BEDRMS_from']

In [123]:
movers.columns

Index(['from_addrid', 'from_lat', 'from_lon', 'PROP_FIPSCD_from',
       'PROP_CENSUSTRACT_from', 'PROP_MUNINAME_from', 'PROP_OWNEROCC_from',
       'PROP_QLTY_from', 'PROP_VALCALC_from', 'PROP_UNVBLDSQFT_from',
       'PROP_BEDRMS_from', 'to_addrid', 'to_lat', 'to_lon', 'PROP_FIPSCD_to',
       'PROP_CENSUSTRACT_to', 'PROP_MUNINAME_to', 'PROP_OWNEROCC_to',
       'PROP_QLTY_to', 'PROP_VALCALC_to', 'PROP_UNVBLDSQFT_to',
       'PROP_BEDRMS_to', 'to_effdate', 'distance', 'AGE', 'LOR', 'HOMEOWNERCD',
       'EHI', 'DWELLTYPE', 'PCTB', 'PCTW', 'PCTH', 'PCTA', 'MHV', 'MEDSCHL',
       'PCTOCCW', 'PCTOCCB', 'block_id_from', 'tract_id_from', 'block_id_to',
       'tract_id_to', 'unit_price_from', 'unit_price_to', 'qlty_decrease',
       'prop_val_pct_chg', 'prop_size_pct_chg', 'price_sqft_pct_chg',
       'bedrooms_delta'],
      dtype='object')

### Merge renters on land use ODs

In [135]:
renters = movers.loc[movers['HOMEOWNERCD'] == 'R', [
    'tract_id_from', 'tract_id_to', 'AGE', 'EHI', 'PCTW', 'MEDSCHL', 'PCTOCCW',
    'qlty_decrease', 'prop_val_pct_chg',
    'prop_size_pct_chg', 'price_sqft_pct_chg', 'bedrooms_delta'
]]

In [130]:
ods.columns

Index(['from', 'to', 'pct_white_from', 'pct_bach_from', 'pct_poverty_from',
       'med_home_val_from', 'med_num_rooms_from', 'med_income_from',
       'tract_id_from', 'life_exp_from', 'med_parcel_size_from',
       'med_jobs_10000_from', 'med_pop_jobs_ratio_10000_from', 'GEOID_from',
       'ALAND_from', 'centroid_from', 'pct_white_to', 'pct_bach_to',
       'pct_poverty_to', 'med_home_val_to', 'med_num_rooms_to',
       'med_income_to', 'tract_id_to', 'life_exp_to', 'med_parcel_size_to',
       'med_jobs_10000_to', 'med_pop_jobs_ratio_10000_to', 'GEOID_to',
       'ALAND_to', 'centroid_to', 'pct_white_delta', 'pct_bach_delta',
       'pct_poverty_delta', 'med_home_val_pct_diff', 'med_num_rooms_delta',
       'med_income_pct_diff', 'life_exp_delta', 'jobs_10km_pct_diff',
       'pop_jobs_ratio_delta', 'med_parcel_size_pct_diff', 'distance'],
      dtype='object')

In [143]:
ods = ods[[
    'tract_id_from', 'tract_id_to', 'pct_white_delta', 'pct_bach_delta', 'pct_poverty_delta',
    'med_home_val_pct_diff', 'med_num_rooms_delta', 'med_income_pct_diff', 'life_exp_delta',
    'jobs_10km_pct_diff', 'pop_jobs_ratio_delta', 'med_parcel_size_pct_diff', 'distance'
]]
ods.rename(columns={'tract_id_to': 'tract_id_alt'}, inplace=True)

In [144]:
ods.head()

Unnamed: 0,tract_id_from,tract_id_alt,pct_white_delta,pct_bach_delta,pct_poverty_delta,med_home_val_pct_diff,med_num_rooms_delta,med_income_pct_diff,life_exp_delta,jobs_10km_pct_diff,pop_jobs_ratio_delta,med_parcel_size_pct_diff,distance
0,6081612500,6081612500,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,706.015581
1,6081613501,6081612500,-3.6,19.6,3.9,1.322023,-1.5,1.026763,1.5,27.168575,-2.34464,0.127392,22653.899784
2,6081610201,6081612500,32.7,24.5,-17.1,1.566565,1.3,2.068941,4.3,0.004539,0.019333,0.589057,4533.923986
3,6081610202,6081612500,33.3,20.9,-19.4,4.421222,1.5,1.83036,7.5,-0.01615,0.056896,-0.198132,5925.981557
4,6081610303,6081612500,35.8,4.2,3.4,0.50438,-1.3,-0.292254,3.7,0.340931,0.25893,0.822274,10953.13879


In [147]:
mct = pd.merge(renters, ods, on='tract_id_from')

### Compute deltas

#### infutor data

### Save data

In [151]:
mct.to_csv('../data/renter_moves_mct.csv')