# Demonstration of Census Data Access

Thanks to Earth Lab
https://earthdatascience.org/tutorials/get-cenus-data-with-cenpy/

In [1]:
import pandas as pd
import cenpy as cen
import pysal

In [2]:
datasets = list(cen.explorer.available(verbose=True).items())

# print first rows of the dataframe containing datasets
pd.DataFrame(datasets).head()

Unnamed: 0,0,1
0,ACSSF5Y2010,2006-2010 American Community Survey 5-Year Est...
1,NONEMP2007,2007 Nonemployer Statistics: Non Employer Stat...
2,POPESTagesex,Vintage 2014 Population Estimates: National An...
3,ZBPTotal2011,2011 County Business Patterns - Zip Code Busin...
4,ITMonthlyImportsUSDA,Time Series International Trade: Monthly U.S. ...


In [3]:
dataset = '2012acs1'
cen.explorer.explain(dataset)

{'2012 American Community Survey: 1-Year Estimates': "The American Community Survey (ACS) is a nationwide survey designed to provide communities a fresh look at how they are changing. The ACS replaced the decennial census long form in 2010 and thereafter by collecting long form type information throughout the decade rather than only once every 10 years.  Questionnaires are mailed to a sample of addresses to obtain information about households -- that is, about each person and the housing unit itself.  The American Community Survey produces demographic, social, housing and economic estimates in the form of 1-year, 3-year and 5-year estimates based on population thresholds. The strength of the ACS is in estimating population and housing characteristics. It produces estimates for small areas, including census tracts and population subgroups.  Although the ACS produces population, demographic and housing unit estimates,it is the Census Bureau's Population Estimates Program that produces an

In [4]:
con = cen.base.Connection(dataset)
con

Connection to 2012 American Community Survey: 1-Year Estimates (ID: http://api.census.gov/data/id/2012acs1)

In [5]:
print(type(con))
print(type(con.geographies))
print(con.geographies.keys())

<class 'cenpy.remote.APIConnection'>
<class 'dict'>
dict_keys(['fips'])


In [6]:
# print head of data frame in the geographies dictionary
con.geographies['fips'].head()

Unnamed: 0,geoLevelId,name,optionalWithWCFor,requires,wildcard
0,500,congressional district,state,[state],[state]
1,60,county subdivision,,"[state, county]",
2,795,public use microdata area,,[state],
3,310,metropolitan statistical area/micropolitan sta...,,,
4,160,place,state,[state],[state]


In [7]:
g_unit = 'county:*'
g_filter = {'state':'8'}

In [8]:
var = con.variables
print('Number of variables in', dataset, ':', len(var))
con.variables.head()

Number of variables in 2012acs1 : 68401


Unnamed: 0,concept,group,label,limit,predicateOnly,predicateType,validValues
for,Census API Geography Specification,,Census API FIPS 'for' clause,0,True,fips-for,
in,Census API Geography Specification,,Census API FIPS 'in' clause,0,True,fips-in,
B20005E_045M,B20005E. Sex by Work Experience by Earnings f...,,Margin of Error for!!Male:!!Other:!!With earni...,0,,,[]
B06004HPR_002M,"B06004HPR. Place of Birth (White Alone, Not H...",,Margin of Error for!!Born in Puerto Rico,0,,,[]
B24126_438E,B24126. Detailed Occupation for the Full-Time...,,"Multiple machine tool setters, operators, and ...",0,,,[]


In [9]:
cols = con.varslike('B01001A_')
cols.extend(['NAME', 'GEOID'])

In [10]:
data = con.query(cols, geo_unit=g_unit, geo_filter=g_filter)
# prints a deprecation warning because of how cenpy calls pandas

For all other conversions use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  df[cols] = df[cols].convert_objects(convert_numeric=convert_numeric)


In [17]:
data.index = data.NAME

# print first five rows and last five columns
data.iloc[:5, -5:]

Unnamed: 0_level_0,B01001A_007M,B01001A_008E,B01001A_009M,NAME,GEOID
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"Adams County, Colorado",514,12648,624,"Adams County, Colorado",05000US08001
"Arapahoe County, Colorado",432,13231,582,"Arapahoe County, Colorado",05000US08005
"Boulder County, Colorado",632,15297,189,"Boulder County, Colorado",05000US08013
"Denver County, Colorado",389,15602,829,"Denver County, Colorado",05000US08031
"Douglas County, Colorado",367,4953,442,"Douglas County, Colorado",05000US08035


In [12]:
cen.tiger.available()

[{'name': 'AIANNHA', 'type': 'MapServer'},
 {'name': 'CBSA', 'type': 'MapServer'},
 {'name': 'Hydro_LargeScale', 'type': 'MapServer'},
 {'name': 'Hydro', 'type': 'MapServer'},
 {'name': 'Labels', 'type': 'MapServer'},
 {'name': 'Legislative', 'type': 'MapServer'},
 {'name': 'Places_CouSub_ConCity_SubMCD', 'type': 'MapServer'},
 {'name': 'PUMA_TAD_TAZ_UGA_ZCTA', 'type': 'MapServer'},
 {'name': 'Region_Division', 'type': 'MapServer'},
 {'name': 'School', 'type': 'MapServer'},
 {'name': 'Special_Land_Use_Areas', 'type': 'MapServer'},
 {'name': 'State_County', 'type': 'MapServer'},
 {'name': 'tigerWMS_ACS2013', 'type': 'MapServer'},
 {'name': 'tigerWMS_ACS2014', 'type': 'MapServer'},
 {'name': 'tigerWMS_ACS2015', 'type': 'MapServer'},
 {'name': 'tigerWMS_ACS2016', 'type': 'MapServer'},
 {'name': 'tigerWMS_ACS2017', 'type': 'MapServer'},
 {'name': 'tigerWMS_Census2010', 'type': 'MapServer'},
 {'name': 'tigerWMS_Current', 'type': 'MapServer'},
 {'name': 'tigerWMS_ECON2012', 'type': 'MapServe

In [19]:
con.set_mapservice('tigerWMS_ACS2013')

# print layers
con.mapservice.layers

{0: (ESRILayer) 2010 Census Public Use Microdata Areas,
 1: (ESRILayer) 2010 Census Public Use Microdata Areas Labels,
 2: (ESRILayer) 2010 Census ZIP Code Tabulation Areas,
 3: (ESRILayer) 2010 Census ZIP Code Tabulation Areas Labels,
 4: (ESRILayer) Tribal Census Tracts,
 5: (ESRILayer) Tribal Census Tracts Labels,
 6: (ESRILayer) Tribal Block Groups,
 7: (ESRILayer) Tribal Block Groups Labels,
 8: (ESRILayer) Census Tracts,
 9: (ESRILayer) Census Tracts Labels,
 10: (ESRILayer) Census Block Groups,
 11: (ESRILayer) Census Block Groups Labels,
 12: (ESRILayer) Unified School Districts,
 13: (ESRILayer) Unified School Districts Labels,
 14: (ESRILayer) Secondary School Districts,
 15: (ESRILayer) Secondary School Districts Labels,
 16: (ESRILayer) Elementary School Districts,
 17: (ESRILayer) Elementary School Districts Labels,
 18: (ESRILayer) Estates,
 19: (ESRILayer) Estates Labels,
 20: (ESRILayer) County Subdivisions,
 21: (ESRILayer) County Subdivisions Labels,
 22: (ESRILayer) 

In [14]:
geodata = con.mapservice.query(layer=84, where='STATE=8')

  outdf.crs = datadict.pop('spatialReference', {})


In [16]:
# preview geodata
geodata.iloc[:5, :5]

Unnamed: 0,AREALAND,AREAWATER,BASENAME,CENTLAT,CENTLON
0,8206547677,4454510,Saguache,38.0807339,-106.2808607
1,4376528368,25375721,La Plata,37.2863615,-107.8435627
2,1419419130,3530746,Sedgwick,40.8759564,-102.3517903
3,1003660612,2035929,San Juan,37.7640122,-107.6762274
4,4605714032,8166128,Cheyenne,38.828178,-102.6034141


In [21]:
newdata = pd.merge(data, geodata, left_on='county', right_on='COUNTY')
newdata.iloc[:5, -5:]

Unnamed: 0,NAME_y,OBJECTID,OID,STATE,geometry
0,Adams County,1226,27553700234319,8,<pysal.cg.shapes.Polygon object at 0x114f4ea90>
1,Arapahoe County,2980,27553703789414,8,<pysal.cg.shapes.Polygon object at 0x11d9aea58>
2,Boulder County,512,27553701435070,8,<pysal.cg.shapes.Polygon object at 0x1145c7b38>
3,Denver County,529,27553700234321,8,<pysal.cg.shapes.Polygon object at 0x11d3c7940>
4,Douglas County,2762,27553711656416,8,<pysal.cg.shapes.Polygon object at 0x11e0126d8>
