# Census API Metric Codes

| Race | Code |
|------|------|
| Total|B03002_001E|
| Black|B03002_004E|
| Asian|B03002_006E|
| Native Hawaiian Pacific Islander|B03002_007E|
| Other|B03002_008E|
| Hispanic or Latino|B03002_012E|
| 2 or More Races|B03002_010E|

| Citizenship / Immigration | Code |
|------|------|
| Foreign Born 1|B06007_033E|
| Foreign Born 2|B05002_013E|
| Not a u.s. Citizen|B05001_006E|
| Speak spanish, speak English less than very well|B06007_037E|
| Speak other, speak English less than very well|B06007_040E|

| Income | Code |
|------|------|
| Total income population|B19001_001E|
| Total income less than 10k|B19001_002E|
| Total income  10-15k|B19001_003E|

| Education | Code |
|------|------|
| Less than HS graduate |B07009_002E|
| High school graduate |B07009_003E|
| Some college or associate's degree |B07009_004E|
| Grad or professional degree |B07009_006E|

In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
import geopandas as gpd
import requests

Let's first check the availability of state data and store the missing url requests in a list

In [2]:
full_state_test = ["%.2d" % i for i in range(1,57)]
bad_apples = []
def state_checker(full_state_test):
    for i in full_state_test:
        url = ("https://api.census.gov/data/2015/acs5?get=NAME,B03002_001E"+
               "&for=tract:*&in=state:" + i + "&key=14ba39dd26088efd8d54c4f01d90023f2d4bfc6d")
        response_code = requests.get(url).status_code
        if response_code != 200:
            bad_apples.append([i, response_code])
state_checker(full_state_test)
print("These states return no content. Bad Apples :(\n", bad_apples)

These states return no content. Bad Apples :(
 [['03', 204], ['07', 204], ['14', 204], ['43', 204], ['52', 204]]


3, 7, 14, 43, 52 are missing, but all the states should be in there if we go up to 56

https://www.census.gov/geo/reference/ansi_statetables.html

In [3]:
def pull_census(state, url_yes_no):
    url = ("https://api.census.gov/data/2015/acs5?get=NAME,B03002_001E,B03002_004E,B03002_006E," +
           "B03002_007E,B03002_008E,B03002_010E,B03002_012E," +
           "B06007_033E,B05002_013E,B05001_006E,B06007_037E,B06007_040E,B19001_001E,B19001_002E,B19001_003E," +
           "B07009_002E,B07009_003E,B07009_004E,B07009_006E" +
           "&for=tract:*&in=state:" + state + "&key=14ba39dd26088efd8d54c4f01d90023f2d4bfc6d")
    if url_yes_no:
        print(url)        
    html = requests.get(url).json()
    return html

In [4]:
#Make a master list range and remove the bad apples
master_list = ["%.2d" % i for i in range(1,57)]
master_list = [i for i in master_list if i not in [bad_apples[i][0] for i in range(len(bad_apples))]]

#Then stitch together all the data frames for the remaining dataset
for i in master_list:
    if i == "01":
        newstate = pull_census(i, False)
        master = pd.DataFrame(newstate, columns = newstate[0])[1:]
    elif i != "01":
        newstate = pull_census(i, False)
        master = master.append(pd.DataFrame(newstate, columns = newstate[0])[1:])

In [5]:
#Column Creation
master["GEOID"] = master['state'] + master['county'] + master['tract']
master["County Name"] = master["NAME"].str.split(",").str[1]
master["State Name"] = master["NAME"].str.split(",").str[2]

In [6]:
master.columns = ['Name', 'Total_Population', 'Black', 'Asian', 'Native_Hawaiian_Pacific_Islander', 'Other', 'Two_or_More_Races', 'Hispanic_or_Latino', 
                  'Foreign_Born_1', 'Foreign_Born_2', 'Not_a_us_Citizen', 'Speak_spanish_little_English', 'Speak_other_little_english',
                  'Total_income_population', 'Total_income_less_than_10k', 'Total_income_10-15k',
                  'Less_than_HS', 'HS_grad', 'College_grad', 'Graduate_or_professional', 
                  'state', 'county', 'tract', 'GEOID', 'County Name', 'State Name']

In [7]:
print("Dataframe Size", master.shape)

Dataframe Size (73056, 26)


In [8]:
master.head()

Unnamed: 0,Name,Total_Population,Black,Asian,Native_Hawaiian_Pacific_Islander,Other,Two_or_More_Races,Hispanic_or_Latino,Foreign_Born_1,Foreign_Born_2,...,Less_than_HS,HS_grad,College_grad,Graduate_or_professional,state,county,tract,GEOID,County Name,State Name
1,"Census Tract 201, Autauga County, Alabama",1948,150,12,0,0,0,17,45,45,...,184,459,258,176,1,1,20100,1001020100,Autauga County,Alabama
2,"Census Tract 202, Autauga County, Alabama",2156,1149,50,0,0,0,17,43,43,...,356,496,342,70,1,1,20200,1001020200,Autauga County,Alabama
3,"Census Tract 203, Autauga County, Alabama",2968,551,41,8,0,0,0,35,35,...,221,747,674,192,1,1,20300,1001020300,Autauga County,Alabama
4,"Census Tract 204, Autauga County, Alabama",4423,162,0,0,48,5,464,133,133,...,339,1044,806,257,1,1,20400,1001020400,Autauga County,Alabama
5,"Census Tract 205, Autauga County, Alabama",10763,2674,412,0,0,49,80,346,346,...,310,1674,1999,1162,1,1,20500,1001020500,Autauga County,Alabama


census_shp = gpd.read_file("/Users/robertdeng/Google Drive/Data Science/W209/Project/Tract_2010Census_DP1/Tract_2010Census_DP1.shp")

In [11]:
census_shp.merge()

Unnamed: 0,GEOID10,NAMELSAD10,ALAND10,AWATER10,INTPTLAT10,INTPTLON10,DP0010001,DP0010002,DP0010003,DP0010004,...,DP0210001,DP0210002,DP0210003,DP0220001,DP0220002,DP0230001,DP0230002,Shape_Leng,Shape_Area,geometry
0,01005950300,Census Tract 9503,352811329.0,499970.0,+31.7908933,-085.5670514,1638,98,87,88,...,673,544,129,1283,355,2.36,2.75,1.164633,0.033648,POLYGON ((-85.52743699999996 31.86650300000008...
1,01005950900,Census Tract 9509,16201446.0,8339342.0,+31.8467221,-085.1462332,4583,456,387,363,...,1784,857,927,2196,2387,2.56,2.57,0.280687,0.002338,POLYGON ((-85.16412199999996 31.83059499999996...
2,01005950800,Census Tract 9508,14630162.0,380622.0,+31.9206930,-085.1760317,2055,110,137,128,...,901,550,351,1257,798,2.29,2.27,0.173100,0.001431,"POLYGON ((-85.14871799999997 31.909335, -85.14..."
3,01005950700,Census Tract 9507,127200997.0,112809.0,+31.9440855,-085.2620842,1727,101,112,106,...,646,372,274,915,663,2.46,2.42,0.622975,0.012143,POLYGON ((-85.14578399999999 31.89149100000009...
4,01005950600,Census Tract 9506,101697268.0,1101261.0,+31.8783526,-085.2729215,2099,137,160,166,...,828,510,318,1295,804,2.54,2.53,0.556037,0.009798,POLYGON ((-85.14572299999998 31.89010600000006...
5,01005950100,Census Tract 9501,485263821.0,29057811.0,+32.0353533,-085.2477678,3321,204,199,212,...,1348,1029,319,2527,794,2.46,2.49,1.270020,0.049107,POLYGON ((-85.31849899999986 32.14725400000009...
6,01005950400,Census Tract 9504,343305165.0,397957.0,+31.6794432,-085.5577196,4303,232,185,122,...,1188,871,317,2078,905,2.39,2.85,1.002607,0.032689,POLYGON ((-85.73037599999992 31.61818899999997...
7,01005950200,Census Tract 9502,517006036.0,983621.0,+31.9222753,-085.4498309,4264,160,177,211,...,1053,733,320,1831,709,2.50,2.22,1.186084,0.049394,POLYGON ((-85.52759099999992 31.86579999999998...
8,01005950500,Census Tract 9505,333702744.0,9991323.0,+31.7634183,-085.2718649,3467,204,198,203,...,1399,1090,309,2638,829,2.42,2.68,1.007324,0.032722,POLYGON ((-85.16432799999995 31.83062000000012...
9,01027959000,Census Tract 9590,179599846.0,1129597.0,+33.3093759,-085.8820883,3224,175,165,208,...,1335,836,499,2020,1054,2.42,2.11,0.742523,0.017497,POLYGON ((-85.96046799999999 33.33433199999996...
