# Census API Metric Codes

| Race | Code |
|------|------|
| Total|B03002_001E|
| Black|B03002_004E|
| Asian|B03002_006E|
| Native Hawaiian Pacific Islander|B03002_007E|
| Other|B03002_008E|
| Hispanic or Latino|B03002_012E|
| 2 or More Races|B03002_010E|

| Citizenship / Immigration | Code |
|------|------|
| Foreign Born 1|B06007_033E|
| Foreign Born 2|B05002_013E|
| Not a u.s. Citizen|B05001_006E|
| Speak spanish, speak English less than very well|B06007_037E|
| Speak other, speak English less than very well|B06007_040E|

| Income | Code |
|------|------|
| Total income population|B19001_001E|
| Total income less than 10k|B19001_002E|
| Total income  10-15k|B19001_003E|

| Education | Code |
|------|------|
| Less than HS graduate |B07009_002E|
| High school graduate |B07009_003E|
| Some college or associate's degree |B07009_004E|
| Grad or professional degree |B07009_006E|

In [44]:
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
import geopandas as gpd
import requests

from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import (ColumnDataSource,HoverTool, LogColorMapper)
from bokeh.palettes import Viridis6 as palette
from bokeh.tile_providers import STAMEN_TERRAIN
output_notebook()

#If you need to install anything
#import sys
#!conda install --yes --prefix {sys.prefix} numpy

In [45]:
#Read in local files
#fcc = pd.read_csv("/Users/robertdeng/Google Drive/Data Science/W209/Project/fbd_us_without_satellite_dec2016_v1.csv", sep=",", encoding = "latin-1")
census_shp = gpd.read_file("/Users/robertdeng/Google Drive/Data Science/W209/Project/Tract_2010Census_DP1/Tract_2010Census_DP1.shp")

Let's first check the availability of state data and store the missing url requests in a list

In [46]:
full_state_test = ["%.2d" % i for i in range(1,57)]
bad_apples = []
def state_checker(full_state_test):
    for i in full_state_test:
        url = ("https://api.census.gov/data/2015/acs5?get=NAME,B03002_001E"+
               "&for=tract:*&in=state:" + i + "&key=14ba39dd26088efd8d54c4f01d90023f2d4bfc6d")
        response_code = requests.get(url).status_code
        if response_code != 200:
            bad_apples.append([i, response_code])
state_checker(full_state_test)
print("These states return no content. Bad Apples :(\n", bad_apples)

These states return no content. Bad Apples :(
 [['03', 204], ['07', 204], ['14', 204], ['43', 204], ['52', 204]]


3, 7, 14, 43, 52 are missing, but all the states should be in there if we go up to 56

https://www.census.gov/geo/reference/ansi_statetables.html

In [47]:
def pull_census(state, url_yes_no):
    url = ("https://api.census.gov/data/2015/acs5?get=NAME,B03002_001E,B03002_004E,B03002_006E," +
           "B03002_007E,B03002_008E,B03002_010E,B03002_012E," +
           "B06007_033E,B05002_013E,B05001_006E,B06007_037E,B06007_040E,B19001_001E,B19001_002E,B19001_003E," +
           "B07009_002E,B07009_003E,B07009_004E,B07009_006E" +
           "&for=tract:*&in=state:" + state + "&key=14ba39dd26088efd8d54c4f01d90023f2d4bfc6d")
    if url_yes_no:
        print(url)        
    html = requests.get(url).json()
    return html

In [48]:
#Make a master list range and remove the bad apples
master_list = ["%.2d" % i for i in range(1,57)]
master_list = [i for i in master_list if i not in [bad_apples[i][0] for i in range(len(bad_apples))]]

#Then stitch together all the data frames for the remaining dataset
for i in master_list:
    if i == "01":
        newstate = pull_census(i, False)
        master = pd.DataFrame(newstate, columns = newstate[0])[1:]
    elif i != "01":
        newstate = pull_census(i, False)
        master = master.append(pd.DataFrame(newstate, columns = newstate[0])[1:])

In [49]:
#Column Creation
master["GEOID"] = master['state'] + master['county'] + master['tract']
master["County Name"] = master["NAME"].str.split(",").str[1]
master["State Name"] = master["NAME"].str.split(",").str[2]

In [50]:
master.columns = ['Name', 'Total_Population', 'Black', 'Asian', 'Native_Hawaiian_Pacific_Islander', 'Other', 'Two_or_More_Races', 'Hispanic_or_Latino', 
                  'Foreign_Born_1', 'Foreign_Born_2', 'Not_a_us_Citizen', 'Speak_spanish_little_English', 'Speak_other_little_english',
                  'Total_income_population', 'Total_income_less_than_10k', 'Total_income_10-15k',
                  'Less_than_HS', 'HS_grad', 'College_grad', 'Graduate_or_professional', 
                  'state', 'county', 'tract', 'GEOID', 'County Name', 'State Name']

# Shapefile

In [51]:
#Get Polygon coordinates
def getPolyCoords(row, geom, coord_type):
    """Returns the coordinates ('x' or 'y') of edges of a Polygon exterior"""
    if row[geom].type == 'Polygon':
    # Parse the exterior of the coordinate
        exterior = row[geom].exterior

        if coord_type == 'x':
        # Get the x coordinates of the exterior
            return list( exterior.coords.xy[0] )
        elif coord_type == 'y':
        # Get the y coordinates of the exterior
            return list( exterior.coords.xy[1] )       

#Create x, y coordinates for polygons
census_shp["x"] = census_shp.apply(getPolyCoords, geom="geometry", coord_type="x", axis=1)
census_shp["y"] = census_shp.apply(getPolyCoords, geom="geometry", coord_type="y", axis=1)

#x, y ranges
tract_bounds=census_shp.total_bounds
xlim = (tract_bounds[0],tract_bounds[2])
ylim = (tract_bounds[1],tract_bounds[3])

bounds = x_range,y_range = (xlim,ylim)

In [52]:
#NaN Checks
#nan_index = merged.index[np.isnan(merged.state.astype(float))].tolist()
#len(merged.index[np.isnan(merged.state.astype(float))].tolist())

In [53]:
merged = census_shp.merge(master, left_on = "GEOID10", right_on = "GEOID", how = "left", sort = True)
#merged.head()

In [54]:
#import pysal as ps
#pop = merged.Total_Population.astype(float)
#pop = pop[~np.isnan(pop)]

In [55]:
#popq = ps.Quantiles(pop,k=9)
#popq, popq.adcm

In [56]:
#popeq = ps.Equal_Interval(pop,k=9)
#popeq, popeq.adcm

In [57]:
#popfj = ps.Fisher_Jenks(pop,k=9)
#popfj, popfj.adcm

In [None]:
temp_stored_merged = merged
merged = merged[merged["state"] == "06"]

#http://geographyplanning.buffalostate.edu/Mix/Python/Visualization-PoorChildrenNY.html

merged_ys = merged['y'].tolist()
merged_xs = merged['x'].tolist()
merged_name = merged['NAMELSAD10'].tolist()
merged_pop = merged['Total_Population'].tolist()

color_mapper = LogColorMapper(palette=palette)

source = ColumnDataSource(
        data=dict(
            x=merged_xs,
            y=merged_ys,
            name=merged_name,
            population = merged_pop
        )
    )

TOOLS = "pan,wheel_zoom,box_zoom,reset,hover,save"

f = figure(
    plot_height=700, plot_width=800,
    title="Census 5 Year Test",
    tools=TOOLS,
    x_axis_location=None,
    y_axis_location=None,
    x_range=x_range,
    y_range=y_range)

f.title.text_font_style = "italic"
f.title.text_font_size = '14pt'
f.background_fill_color = 'gray'
f.grid.grid_line_color = None
f.add_tile(STAMEN_TERRAIN)

f.patches("x", "y",source=source, fill_color = {'field': 'population', 'transform': color_mapper},
          fill_alpha=0.7,line_color = 'white',line_width=0.5) 

hover = f.select_one(HoverTool)
hover.point_policy = "follow_mouse"
hover.tooltips = [
    ("Census Tract", "@name"),
    ("Population", "@population"),
]
show(f)