Land use amenities to get:
- [x] % white: DP05_0037PE
- [x] median home value: B25077_001E
- [x] median # rooms: B25018_001E
- [ ] transit access
- [x] access to jobs
- [x] median income: B06011_001E
- [x] median life expectancy
- [x] % bach degree: S1501_C02_012E
- [ ] avg lot size
- [x] % poverty: S1701_C03_001E

In [268]:
import pandas as pd
import dask.dataframe as dd
from census import Census
import requests
import pandana as pdna
%matplotlib inline

### Load Land Use Data

MTC Data

In [347]:
parcels = pd.read_csv('../data/parcels.csv', dtype={'block_id':str}, index_col='primary_id')
nodes = pd.read_csv('../data/drive_nodes.csv').set_index('osmid')
edges = pd.read_csv('../data/drive_edges.csv').set_index('uniqueid')
access_vars = pd.read_csv('../data/drive_net_vars.csv')

  mask |= (ar1 == a)


In [269]:
net = pdna.Network(
            nodes.x, nodes.y, edges.u,
            edges.v, edges[['length']],
            twoway=False)

In [348]:
parcels['node_id'] = net.get_node_ids(parcels.x, parcels.y)

In [349]:
parcels = pd.merge(parcels, access_vars, left_on='node_id', right_on='osmid')

In [350]:
parcels['tract_id'] = parcels['block_id'].str[:11]

In [351]:
parcels = parcels[['tract_id', 'acres', 'jobs_10000', 'pop_jobs_ratio_10000']]

In [381]:
parcels_jobs_access = parcels.groupby('tract_id').median()

In [382]:
parcels_jobs_access.rename(columns={col: 'med_' + col for col in parcels_jobs_access.columns}, inplace=True)
parcels_jobs_access.rename(columns={'med_acres': 'med_parcel_size'}, inplace=True)

CDC Data

In [4]:
life_exp = pd.read_csv('https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NVSS/USALEEP/CSV/CA_A.CSV', dtype={'Tract ID': str})

In [123]:
life_exp.rename(columns={'Tract ID': 'tract_id', 'e(0)': 'life_exp'}, inplace=True)

ACS Data

In [98]:
tables_dict = {
    'data profiles': {'suburl': '/profile', 'tables': 'DP05_0037PE'},
    'subject tables': {'suburl': '/subject', 'tables': 'S1501_C02_012E,S1701_C03_001E'},
    'detailed tables': {'suburl': '', 'tables': 'B25077_001E,B25018_001E,B06011_001E'}
}
base_url = 'https://api.census.gov/data/2017/acs/acs5{0}?get=' + \
    '{1}&for=tract:*&in=state:06&in=county:001,013,041,055,075,081,085,095,097'

In [372]:
for table in tables_dict.keys():
    print('Getting data from {0}'.format(table))
    url = base_url.format(tables_dict[table]['suburl'], tables_dict[table]['tables'])
    raw = requests.get(url)
    df = pd.read_json(raw.content)
    df = df[1:] #take the data less the header row
    df.columns = raw.json()[0]
    for col in df.columns:
        if col not in ['state', 'county', 'tract']:
            df[col] = df[col].astype(float)
    tables_dict[table]['data'] = df

Getting data from data profiles
Getting data from subject tables
Getting data from detailed tables


In [373]:
acs_all = pd.DataFrame(columns=['state','county','tract'])

In [374]:
for table in tables_dict.keys():
    acs_all = pd.merge(acs_all, tables_dict[table]['data'], on=['state', 'county', 'tract'], how='right')

In [375]:
acs_all.rename(columns={
    'DP05_0037PE': 'pct_white', 'S1501_C02_012E': 'pct_bach', 'S1701_C03_001E': 'pct_poverty',
    'B25077_001E': 'med_home_val', 'B25018_001E': 'med_num_rooms', 'B06011_001E': 'med_income'}, inplace=True)

In [376]:
acs_all['tract_id'] = acs_all['state'] + acs_all['county'] + acs_all['tract']

In [377]:
acs_all.head()

Unnamed: 0,pct_white,state,county,tract,pct_bach,pct_poverty,med_home_val,med_num_rooms,med_income,tract_id
0,79.6,6,81,612500,40.4,6.9,1854600.0,4.4,73762.0,6081612500
1,83.2,6,81,613501,20.8,3.0,798700.0,5.9,36394.0,6081613501
2,46.9,6,81,610201,15.9,24.0,722600.0,3.1,24035.0,6081610201
3,46.3,6,81,610202,19.5,26.3,342100.0,2.9,26061.0,6081610202
4,43.8,6,81,610303,36.2,3.5,1232800.0,5.7,104221.0,6081610303


In [383]:
land_use = pd.merge(acs_all, life_exp[['tract_id','life_exp']], on='tract_id')

In [384]:
land_use = pd.merge(land_use, parcels_jobs_access, on='tract_id')

In [385]:
land_use.head()

Unnamed: 0,pct_white,state,county,tract,pct_bach,pct_poverty,med_home_val,med_num_rooms,med_income,tract_id,life_exp,med_parcel_size,med_jobs_10000,med_pop_jobs_ratio_10000
0,79.6,6,81,612500,40.4,6.9,1854600.0,4.4,73762.0,6081612500,84.9,0.186643,125491.0,1.841444
1,83.2,6,81,613501,20.8,3.0,798700.0,5.9,36394.0,6081613501,83.4,0.165552,4455.0,4.186083
2,46.9,6,81,610201,15.9,24.0,722600.0,3.1,24035.0,6081610201,80.6,0.117455,124924.0,1.82211
3,46.3,6,81,610202,19.5,26.3,342100.0,2.9,26061.0,6081610202,77.4,0.23276,127551.0,1.784548
4,43.8,6,81,610303,36.2,3.5,1232800.0,5.7,104221.0,6081610303,81.2,0.102423,93585.0,1.582513


### Load mover data

In [386]:
movers = pd.read_csv('../data/movers.csv', index_col=0, dtype={
    'PROP_FIPSCD_from': str, 'PROP_CENSUSTRACT_from': str, 'PROP_FIPSCD_to': str, 'PROP_CENSUSTRACT_to': str})

In [387]:
movers = movers[(movers['to_effdate'] > 201012) & (movers['to_effdate'] < 201901)]

In [388]:
movers['block_id_from'] = movers['PROP_FIPSCD_from'] + movers['PROP_CENSUSTRACT_from']
movers['tract_id_from'] = movers['block_id_from'].str[:11]
movers['block_id_to'] = movers['PROP_FIPSCD_to'] + movers['PROP_CENSUSTRACT_to']
movers['tract_id_to'] = movers['block_id_to'].str[:11]

### Merge moves on "from" land use

In [389]:
movers = pd.merge(movers, land_use, left_on='tract_id_from', right_on='tract_id')
movers = pd.merge(movers, land_use, left_on='tract_id_to', right_on='tract_id', suffixes=('_from', '_to'))

### Compute deltas

#### infutor data

In [390]:
# property quality
movers['qlty_decrease'] = 0
movers.loc[
    (movers['PROP_QLTY_from'] == 'QLU') &
    (movers['PROP_QLTY_to'].isin(['QEX', 'QGO', 'QAV', 'QFA', 'QPO'])), 'qlty_decrease'] = 1
movers.loc[
    (movers['PROP_QLTY_from'] == 'QEX') &
    (movers['PROP_QLTY_to'].isin(['QGO', 'QAV', 'QFA', 'QPO'])), 'qlty_decrease'] = 1
movers.loc[
    (movers['PROP_QLTY_from'] == 'QGO') &
    (movers['PROP_QLTY_to'].isin(['QAV', 'QFA', 'QPO'])), 'qlty_decrease'] = 1
movers.loc[
    (movers['PROP_QLTY_from'] == 'QAV') &
    (movers['PROP_QLTY_to'].isin(['QFA', 'QPO'])), 'qlty_decrease'] = 1
movers.loc[
    (movers['PROP_QLTY_from'] == 'QFA') &
    (movers['PROP_QLTY_to'].isin(['QPO'])), 'qlty_decrease'] = 1

# property value
movers['prop_val_pct_chg'] = (movers['PROP_VALCALC_to'] - movers['PROP_VALCALC_from']) / movers['PROP_VALCALC_from']

# property size
movers['prop_size_pct_chg'] = (
    movers['PROP_UNVBLDSQFT_to'] - movers['PROP_UNVBLDSQFT_from']) / movers['PROP_UNVBLDSQFT_from']

# bedrooms
movers['bedrooms_delta'] = movers['PROP_BEDRMS_to'] - movers['PROP_BEDRMS_from']

#### acs data

In [391]:
movers['pct_white_delta'] = movers['pct_white_to'] - movers['pct_white_from']
movers['pct_bach_delta'] = movers['pct_bach_to'] - movers['pct_bach_from']
movers['pct_poverty_delta'] = movers['pct_poverty_to'] - movers['pct_poverty_from']
movers['med_home_val_pct_diff'] = (
    movers['med_home_val_to'] - movers['med_home_val_from']) / movers['med_home_val_from']
movers['med_num_rooms_delta'] = movers['med_num_rooms_to'] - movers['med_num_rooms_from']
movers['med_income_pct_diff'] = (movers['med_income_to'] - movers['med_income_from']) / movers['med_income_from']
movers['life_exp_delta'] = movers['life_exp_to'] - movers['life_exp_from']

#### mtc data

In [393]:
movers['jobs_10km_pct_diff'] = (movers['med_jobs_10000_to'] - movers['med_jobs_10000_from']) / movers['med_jobs_10000_from']
movers['pop_jobs_ratio_delta'] = movers['med_pop_jobs_ratio_10000_to'] - movers['med_pop_jobs_ratio_10000_from']
movers['med_parcel_size_pct_diff'] = (movers['med_parcel_size_to'] - movers['med_parcel_size_from']) / movers['med_parcel_size_from']

### Save data

In [394]:
movers.to_csv('../data/movers_2010_processed.csv')