# Census Data Collection

This notebook uses the Census Data API to collect data for all tracts in Boston's Suffolk County from the 2000 and 2010 Censuses.

A note on the API: there is a variable limit of 50 per single query so queries were run separately and the dataframes were later concatenated.

## 2000 Census Data - Boston

In [10]:
import pandas as pd
import requests

In [3]:
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 2)
pd.set_option('max_colwidth',60)

## File 1

In [316]:
# grab the data for all variable options 
# this df provides table ID and names
response_vars = requests.get('https://api.census.gov/data/2000/sf1/variables')
data_vars = response_vars.json()
df_vars=pd.DataFrame(data_vars)

In [317]:
df_vars.head()
header_vars = df_vars.iloc[0]

In [318]:
df_vars=df_vars[1:]

In [319]:
df_vars.rename(columns=header_vars, inplace=True)

In [320]:
# melt the df so we can concatenate it to the df with the values and can rename the columns with long label names
melted_df_variables = df_vars.melt(id_vars=['name'],value_vars=['label'])

## Population Data

In [321]:
response_sf1_call1 = requests.get('https://api.census.gov/data/2000/sf1?get=P001001,P003003,P003004,P003006,P003008,P003009,P004002,P015001,P016001,P018002,P018005,P018007,P018008,P019002,P019011,P020017,P020002,P034002,P037001,P037002,P037006&for=tract:*&in=state:25%20county:025')
data_sf1_call1 = response_sf1_call1.json()

In [322]:
# put data into df and make 1st row the header
df_sf1_call1 = pd.DataFrame(data_sf1_call1)
header = df_sf1_call1.iloc[0]
df_sf1_call1=df_sf1_call1[1:]

In [323]:
df_sf1_call1.rename(columns=header, inplace=True)

In [324]:
df_sf1_call1.head()

Unnamed: 0,P001001,P003003,P003004,P003006,P003008,P003009,P004002,P015001,P016001,P018002,...,P019011,P020017,P020002,P034002,P037001,P037002,P037006,state,county,tract
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,25,25,0
2,3968,2940,153,402,271,185,349,1563,3930,409,...,1247,291,1272,499,38,0,38,25,25,100
3,3887,3166,123,324,118,143,289,1543,3841,368,...,1294,224,1319,479,46,0,46,25,25,201
4,3925,2738,270,590,189,123,442,1494,3905,372,...,1102,272,1222,492,20,0,20,25,25,202
5,2923,2453,88,204,69,101,162,1159,2747,287,...,959,191,968,378,176,170,6,25,25,301


## Housing Data

In [328]:
response_sf1_call2 = requests.get('https://api.census.gov/data/2000/sf1?get=H001001,H003002,H003003,H006001,H006002,H006003,H006007,H007010,H013001,H013002,H013003,H013004,H013005,H013006,H013007,H013008&for=tract:*&in=state:25%20county:025')
data_sf1_call2 = response_sf1_call2.json()

In [329]:
df_sf1_call2 = pd.DataFrame(data_sf1_call2)
header2 = df_sf1_call2.iloc[0]
df_sf1_call2=df_sf1_call2[1:]

In [330]:
df_sf1_call2.rename(columns=header2, inplace=True)

In [331]:
df_sf1_call2.head() 

Unnamed: 0,H001001,H003002,H003003,H006001,H006002,H006003,H006007,H007010,H013001,H013002,H013003,H013004,H013005,H013006,H013007,H013008,state,county,tract
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,25,25,0
2,1614,1563,51,1563,1271,57,57,97,1563,409,519,298,194,86,40,17,25,25,100
3,1574,1543,31,1543,1343,34,29,69,1543,368,541,338,184,63,31,18,25,25,201
4,1516,1494,22,1494,1144,90,54,123,1494,372,470,273,208,115,39,17,25,25,202
5,1194,1159,35,1159,1009,34,19,50,1159,287,467,210,124,43,17,11,25,25,301


## File 3

In [333]:
# grab the data for all variable options # this df provides table ID and names
response_sf3_vars = requests.get('https://api.census.gov/data/2000/sf3/variables')
data_sf3_vars = response.json()
df_sf3_vars=pd.DataFrame(data_sf3_vars)

In [334]:
header3 = df_sf3_vars.iloc[0]

In [335]:
df_sf3_vars=df_sf3_vars[1:]

In [336]:
df_sf3_vars.rename(columns=header3, inplace=True)

In [337]:
df_sf3_vars.head()

Unnamed: 0,name,label,concept
1,for,Census API FIPS 'for' clause,Census API Geography Specification
2,in,Census API FIPS 'in' clause,Census API Geography Specification
3,P029009,RELATIONSHIP BY AGE HH:REL CHD:Own:6-11,P29. Relationship By Age For The Population Under 18 Yea...
4,P029007,RELATIONSHIP BY AGE HH:REL CHD:Own:3'&'4,P29. Relationship By Age For The Population Under 18 Yea...
5,P029008,RELATIONSHIP BY AGE HH:REL CHD:Own:5,P29. Relationship By Age For The Population Under 18 Yea...


In [None]:
# MELT THE DF

In [None]:
# Other

In [347]:
response_sf3_call1 = requests.get('https://api.census.gov/data/2000/sf3?get=P021002,P021003,P021009,P021014,P026001,P026002,P026003,P026004,P026005,P030002,P030003,P030004,P030005,P030006,P030007,P030008,P030009,P030013,P030014,P030016,P037002,P037003,P037011,P037014,P037015,P037016,P037017,P037018,P037019,P037020,P037028,P037031,P037032,P037033,P037034,P037035&for=tract:*&in=state:25%20county:025')
data_sf3_call1 = response_sf3_call1.json()

In [348]:
df_sf3_call1 = pd.DataFrame(data_sf3_call1)

header4 = df_sf3_call1.iloc[0]
df_sf3_call1=df_sf3_call1[1:]
df_sf3_call1.rename(columns=header4,inplace=True)

In [349]:
df_sf3_call1.head()

Unnamed: 0,P021002,P021003,P021009,P021014,P026001,P026002,P026003,P026004,P026005,P030002,...,P037020,P037028,P037031,P037032,P037033,P037034,P037035,state,county,tract
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,25,25,0
2,2935,1982,114,447,2287,2262,1258,1004,25,1526,...,34,392,49,339,132,28,14,25,25,100
3,2886,1726,88,457,2681,2664,1337,1327,17,1715,...,49,203,92,457,152,46,24,25,25,201
4,2620,1839,56,601,2172,2162,1179,983,10,1566,...,28,340,72,393,150,16,5,25,25,202
5,2142,1277,23,346,1625,1586,803,783,39,1202,...,15,255,57,364,195,29,0,25,25,301


In [353]:
# second dataframe 
response_sf3_call2 = requests.get('https://api.census.gov/data/2000/sf3?get=P043002,P043006,P043007,P043009,P043013,P043014,P076001,P076002,P076003,P076004,P076005,P076006,P076007,P076008,P076009,P076010,P076011,P076012,P076013,P076014,P076015,P076016,P076017,P077001&for=tract:*&in=state:25%20county:025')
data_sf3_call2 = response_sf3_call2.json()

In [354]:
df_sf3_call2 = pd.DataFrame(data_sf3_call2)

header5 = df_sf3_call2.iloc[0]
df_sf3_call2=df_sf3_call2[1:]
df_sf3_call2.rename(columns=header5,inplace=True)

In [355]:
df_sf3_call2.head()

Unnamed: 0,P043002,P043006,P043007,P043009,P043013,P043014,P076001,P076002,P076003,P076004,...,P076012,P076013,P076014,P076015,P076016,P076017,P077001,state,county,tract
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,25,25,0
2,1780,1277,57,1683,1067,45,700,37,39,41,...,133,53,52,0,6,13,46100,25,25,100
3,1756,1433,30,1747,1318,24,680,19,0,26,...,115,161,37,31,32,0,62368,25,25,201
4,1558,1145,25,1700,1076,37,782,50,29,44,...,111,177,62,14,17,0,59306,25,25,202
5,1107,732,27,1380,910,0,507,25,33,9,...,110,74,56,16,0,0,60347,25,25,301


In [356]:
response_sf3_call3 = requests.get('https://api.census.gov/data/2000/sf3?get=H030002,H030003,H030004,H030005,H030009,H055001,H056001,H057001,H063001,H085001,H086001&for=tract:*&in=state:25%20county:025')
data_sf3_call3 = response_sf3_call3.json()
df_sf3_call3 = pd.DataFrame(data_sf3_call3)
header6 = df_sf3_call3.iloc[0]
df_sf3_call3=df_sf3_call3[1:]
df_sf3_call3.rename(columns=header6,inplace=True)

In [357]:
df_sf3_call3.head()

Unnamed: 0,H030002,H030003,H030004,H030005,H030009,H055001,H056001,H057001,H063001,H085001,H086001,state,county,tract
1,0,0,0,0,0,0,0,0,0,0,0,25,25,0
2,135,100,482,525,112,559,898,1181,993,222100,94815000,25,25,100
3,244,46,557,526,25,717,991,1350,1110,265600,123345000,25,25,201
4,258,46,627,263,9,424,820,1084,857,276700,140145000,25,25,202
5,190,47,429,209,99,547,971,1185,1052,214400,107727500,25,25,301


In [None]:
## merge all the 2000 census data 

In [None]:
df_2000 = pd.merge(df_sf1_call1,df_sf1_call2,df_sf3_call1,df_sf3_call2,df_sf3_call3)

# 2010 Census Data- Boston

## File 1

In [295]:
response2010 = requests.get('https://api.census.gov/data/2010/dec/sf1?get=P001001,P003003,P003004,P003006,P003008,P003009,P004002,P015001,P016001,P018002,P018005,P018007,P018008,P019002,P019011,P020017,P020002,P034002,P037001,P037002,P037006&for=tract:*&in=state:25%20county:025')
data2010 = response.json()

In [296]:
df_2010 = pd.DataFrame(data2010)

In [297]:
df_2010

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,H030002,H030003,H030004,H030005,H030009,H055001,H056001,H057001,H063001,H085001,H086001,state,county,tract
1,0,0,0,0,0,0,0,0,0,0,0,25,025,000000
2,135,100,482,525,112,559,898,1181,993,222100,94815000,25,025,000100
3,244,46,557,526,25,717,991,1350,1110,265600,123345000,25,025,000201
4,258,46,627,263,9,424,820,1084,857,276700,140145000,25,025,000202
5,190,47,429,209,99,547,971,1185,1052,214400,107727500,25,025,000301
6,232,19,566,396,11,916,1241,1504,1331,259500,159300000,25,025,000302
7,125,120,525,289,884,289,819,1143,900,202700,119795000,25,025,000401
8,149,72,508,205,7,813,1021,1355,1093,303200,122940000,25,025,000402
9,0,7,63,268,703,635,901,1174,941,132000,98000000,25,025,000501
