In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from setup import *

%load_ext autoreload
%autoreload 2

In [2]:
# demographic information of census blocks
demos = pd.read_csv(data_dir+'nhgis0004_ds239_20185_2018_blck_grp.csv')

data_cols = [x for x in list(demos.columns) if len(x) == 8 and x[0:2]=='AJ' and x[4]=='E']
# This will tell us if we have any non-numeic data types
# print(demo_to_grid[data_cols].dtypes.value_counts())
# Run this to find out which columns are non-numeric. Then can feed them to the next line
# demo_to_grid[data_cols].select_dtypes(include=['int64']).columns
data_cols.remove('AJ0EE001')

In [5]:
travel_time_cols = ['AJXEE0'+str(i).zfill(2) for i in range(2,14)]
tt_medians = [2.5,7,12,17,22,27,32,37,42,52,74.5,100]

# aggregate values for each station.
demo_census = demos.groupby(['COUNTYA', 'TRACTA'], as_index=False)[data_cols].sum()

# create demographics for each cell.
demo_census['pct25_34yrs'] = (demo_census[['AJWBE011','AJWBE012',
                                         'AJWBE035','AJWBE036']].sum(axis = 1))/demo_census['AJWBE001']
demo_census['pct35_50yrs'] = (demo_census[['AJWBE013','AJWBE014','AJWBE015',
                                         'AJWBE037','AJWBE038','AJWBE039']].sum(axis = 1))/demo_census['AJWBE001']
demo_census['pctover65yrs'] = (demo_census[['AJWBE020','AJWBE021','AJWBE022','AJWBE023','AJWBE024','AJWBE025',
                                          'AJWBE044','AJWBE045','AJWBE046','AJWBE047','AJWBE048','AJWBE049']].sum(axis = 1))/demo_census['AJWBE001']
demo_census['pctwhite_alone'] = demo_census['AJWNE002']/demo_census['AJWNE001']
demo_census['pct_nonwhite'] = (demo_census['AJWNE001']-demo_census['AJWNE002'])/demo_census['AJWNE001']
demo_census['pctblack_alone'] = demo_census['AJWNE003']/demo_census['AJWNE001']
demo_census['pct_hs_grad'] = (demo_census[['AJYPE017','AJYPE018','AJYPE019','AJYPE020','AJYPE021','AJYPE022',
                                         'AJYPE023','AJYPE024','AJYPE025']].sum(axis = 1))/demo_census['AJYPE001']
demo_census['pct_col_grad'] = (demo_census[['AJYPE022','AJYPE023','AJYPE024','AJYPE025']].sum(axis = 1))/demo_census['AJYPE001']
demo_census['pctPTcommute'] = demo_census['AJXCE010']/demo_census['AJXCE001']

demo_census['25_34yrs'] = (demo_census[['AJWBE011','AJWBE012',
                                         'AJWBE035','AJWBE036']].sum(axis = 1))
demo_census['35_50yrs'] = (demo_census[['AJWBE013','AJWBE014','AJWBE015',
                                         'AJWBE037','AJWBE038','AJWBE039']].sum(axis = 1))
demo_census['over65yrs'] = (demo_census[['AJWBE020','AJWBE021','AJWBE022','AJWBE023','AJWBE024','AJWBE025',
                                          'AJWBE044','AJWBE045','AJWBE046','AJWBE047','AJWBE048','AJWBE049']].sum(axis = 1))
demo_census['white_alone'] = demo_census['AJWNE002']/demo_census['AJWNE001']
demo_census['nonwhite'] = demo_census['AJWNE001']-demo_census['AJWNE002']
demo_census['black_alone'] = demo_census['AJWNE003']
demo_census['hs_grad'] = (demo_census[['AJYPE017','AJYPE018','AJYPE019','AJYPE020','AJYPE021','AJYPE022',
                                         'AJYPE023','AJYPE024','AJYPE025']].sum(axis = 1))
demo_census['col_grad'] = (demo_census[['AJYPE022','AJYPE023','AJYPE024','AJYPE025']].sum(axis = 1))
demo_census['PTcommute'] = demo_census['AJXCE010']

demo_census['avg_tt_to_work'] = (demo_census[travel_time_cols].multiply(tt_medians, axis = 1).sum(axis =1))/demo_census['AJXEE001']

# create income info
# demo_census['pct60plusmincommute'] = (demo_census[['AJXEE012','AJXEE013']].sum(axis=1))/demo_census['AJXEE001']
cell_inc = demos.groupby(['COUNTYA', 'TRACTA'], as_index=False).apply(lambda x: 
                    (x['AJWME001']*x['AJ0EE001']).sum(axis = 0)/x['AJWME001'].sum(axis=0))
cell_inc.columns = ['COUNTYA', 'TRACTA','inc_per_capita']
demo_census = pd.merge(demo_census, cell_inc, how = 'left', on = ['COUNTYA', 'TRACTA'])




In [7]:
# extract demo data
demo_df = demo_census[['COUNTYA', 'TRACTA', 'AJWME001',
                       'pct25_34yrs','pct35_50yrs','pctover65yrs',
                       'pctwhite_alone','pct_nonwhite','pctblack_alone',
                       'pct_col_grad','pctPTcommute',
                       '25_34yrs','35_50yrs','over65yrs',
                       'white_alone','nonwhite','black_alone',
                       'col_grad','PTcommute',
                       'avg_tt_to_work','inc_per_capita']].fillna(0)

demo_df = demo_df.rename(columns = {'AJWME001':'tot_population'})

demos = ['tot_population','pct25_34yrs','pct35_50yrs','pctover65yrs',
         'pctwhite_alone','pct_nonwhite','pctblack_alone',
         'pct_col_grad','pctPTcommute','25_34yrs','35_50yrs','over65yrs',
         'white_alone','nonwhite','black_alone',
         'col_grad','PTcommute','avg_tt_to_work','inc_per_capita']

In [None]:
# (NOT USED) job
jobs = pd.read_csv(data_dir+'data_raw/il_wac_S000_JT00_2017.csv', usecols = [0,1], names = ['GEOID10','jobs'])

jgrid = block_int_stop.merge(jobs, how = 'left', on = 'GEOID10')
jgrid['jobs'] = jgrid['proportion_area_stop']*jgrid['jobs']
jgrid = jgrid.groupby(['STATION_ID'])['jobs'].sum().reset_index()

demo_df = pd.merge(demo_df, jgrid, on='STATION_ID')

In [8]:
# save demographics data
demo_df.to_csv(data_dir+"census_demo_df.csv", index=False)