# Collect data from Census surveys

In [4]:
import pandas as pd
import numpy as np
import requests
from json import loads as jsonloads
%matplotlib inline
import matplotlib.pylab as plt

### Generate grid in the space of Pheniox

In [5]:
def generate_query_grid(lat1 = 33., lon1 = -111., 
                        lat2 = 34., lon2 = -113.,
                        gridspace = 25.0):
    gridspacelat = abs(lat1-lat2)/gridspace
    gridspacelon = abs(lon2-lon1)/gridspace
    grid = []
    for la in np.arange(lat1, lat2+gridspacelat, gridspacelat):
        for lo in np.arange(lon2-gridspacelon, lon1, gridspacelon):
            bnds = (la, lo)
            grid.append(bnds)
    print 'total generated by grid = {}'.format((gridspace+1)**2)
    return grid

In [7]:
result = requests.get('http://data.fcc.gov/api/block/find?format=json&latitude='+'33.'+'&longitude='+'-111.'+'&showall=false').json()


In [12]:
result.items()

[(u'County', {u'FIPS': u'04021', u'name': u'Pinal'}),
 (u'status', u'OK'),
 (u'State', {u'FIPS': u'04', u'code': u'AZ', u'name': u'Arizona'}),
 (u'Block', {u'FIPS': u'040210022001080'}),
 (u'executionTime', u'564')]

### Query census lat lon to FIPS

In [6]:
def latlon_to_fips(lat, lon):
    lat = str(np.round(lat,4))
    lon = str(np.round(lon,4))    
    result = requests.get('http://data.fcc.gov/api/block/find?format=json&latitude='+lat+'&longitude='+lon+'&showall=false').json()
    return result['Block']['FIPS']

### Loop through grid of lat longs

In [4]:
def extract_fip_from_grid(grid):
    fiplatlon = []
    for lat, lon in grid:
        fips = latlon_to_fips(lat, lon)
        fiplatlon.append([fips, lat, lon])
    return fiplatlon

###  Extract tags from FIPS

In [12]:
def break_fips(fips):
    state = fips[0:2]
    county = fips[2:5]
    tract = fips[5:11]
    block_group = fips[11:12]
    block = fips[12:15]
    return state, county, tract, block_group, block
#'120950170151016'

### Apply above functions to generate lat lon grid and corresponding census code info

In [20]:
maricopa = '013'
pinal = '021'
grid = generate_query_grid()
fiplatlon = extract_fip_from_grid(grid)
fiplatlon = np.array(fiplatlon, dtype=object)
dftemp = pd.DataFrame(fiplatlon)
dftemp.columns=['fips','lat','lon']
splitfips = np.array(list(dftemp.fips.apply(break_fips).values),dtype=str)
dfcensus = pd.concat([pd.DataFrame(splitfips, 
             columns=['state', 'county', 'tract', 'block_group', 'block']),
            dftemp],axis=1)
dfcensus = dfcensus[(dfcensus['county']==pinal) | (dfcensus['county']==maricopa)]
dfcensus.to_csv('fips_lat_lon.csv',index=False)

### Load dumped flips_lat_long

In [211]:
test = pd.read_csv('/Users/John/Dropbox/Opendoor/Aug2_census/fips_lat_lon.csv',
                   index_col=0, dtype={'fips': object})

### Information for API query

In [14]:
# acs5
median_household_income = 'B19013_001E'
income_per_capita = 'B19301_001E'
median_age = 'B01002_001E'
total_transportation = 'B08301_001E'
car_transportation = 'B08301_002E'
public_transportation = 'B08301_010E'
#commutelen = 'B08136_001E'
total_living = 'B25003_001E'
owner_living = 'B25003_002E'
renter_living  = 'B25003_003E'
popwhite = 'B02001_002E'
poptotal = 'B01003_001E'
rent_cost = 'B25064_001E'
bachelors = 'B15003_022E'

# sf1
#totalpop = 'P0030001'
#whitepop = 'P0030002'
#blackpop = 'P0030003'
# counties
maricopa = '013'
pinal = '021'
# state
az = '04'
# survey
acs = 'acs5?'
sf = 'sf1?'
# combine
#acslist = [median_household_income,median_age,total_transportation,
#           car_transportation,public_transportation,total_living,
#           owner_living,renter_living,popwhite,poptotal,rent_cost,bachelors]
acslist = [median_household_income,income_per_capita,median_age,
           total_transportation,car_transportation,public_transportation,
           total_living,owner_living,renter_living,popwhite,poptotal, 
           rent_cost,bachelors]
col = ['median_household_income',
'income_per_capita',
'median_age',
'total_transportation',
'car_transportation',
'public_transportation',
'total_living',
'owner_living',
'renter_living',
'popwhite',
'poptotal', 
'rent_cost',
'bachelors']
acsstr = ','.join(acslist)
acsstr = ','.join(acslist)
#sf1str = ','.join(sf1list)
#sf1list = [totalpop,whitepop,blackpop]

### Query census api 

In [15]:
acs_out={}
counties = [maricopa, pinal]
year = ['2012/','2013/']
base = 'http://api.census.gov/data/'
key = 'key=f3b0208cfff49733c10c3ccf09f8b8b378e5da9d'
get = '&get='
location = '&for=block+group:*&in=state:04+county:'
for c in counties:
    for y in year:
        q = base+y+acs+key+get+acsstr+location+c
        r = requests.get(q)
        acs_out[y[:-1]+'_'+c] = jsonloads(r.text)

### Break dictionary and create df

In [24]:
first=True
df_bgrp_census = None
for key, item in acs_out.iteritems():
    if first:
        df_bgrp_census = pd.DataFrame(item,dtype=object, columns=col+['state', 'county', 'tract', 'block_group']).ix[1:]
        df_bgrp_census['year'] = int(key[0:4])
        first = False
    else:
        dftemp = pd.DataFrame(item,dtype=object, columns=col+['state', 'county', 'tract', 'block_group']).ix[1:]
        dftemp['year'] = int(key[0:4])
        df_bgrp_census = pd.concat([df_bgrp_census,dftemp],axis=0)

### Write files

In [34]:
df_census_data = pd.merge(df_bgrp_census, dfcensus, on=['state','county','tract','block_group'], how='left')        

In [43]:
df_census_data.to_csv('custom_census_data.csv', index=False)