In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census

# Set variable to hold year
year = 2018

# Census API Key
from api_key import api_key
c = Census(api_key, year=year)

## US Data
#### Get Data for all US

Get US Population Data

In [2]:
# Run API Call for Population Data
us_pop_data = c.acs5.get(("NAME", "B01003_001E", "B02001_002E", "B02001_003E",
                      "B02001_004E", "B02001_005E", "B02001_006E", "B02001_008E",                                             
                      "B01002_001E","B19013_001E",                       
                      "B19301_001E", "B23025_004E", "B23025_005E"), 
                      {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
us_pop_df = pd.DataFrame(us_pop_data)

In [3]:
# Column Reordering
us_pop_df = us_pop_df.rename(columns={
                                  "zip code tabulation area": "Zipcode",
                                  "B01003_001E": "Population",
                                  "B02001_002E": "Population (White Alone)",
                                  "B02001_003E": "Population (Black Alone)", 
                                  "B02001_004E": "Population (American Indian or Alaskan Native Alone)",
                                  "B02001_005E": "Population (Asian Alone)",
                                  "B02001_006E": "Population (Native Hawaiian and Other Pacific Islander Alone)",
                                  "B02001_008E": "Population (Two or more races)",
                                  "B01002_001E": "Median Age",
                                  "B19013_001E": "Household Income",
                                  "B19301_001E": "Per Capita Income",
                                  "B23025_004E": "Employed",
                                  "B23025_005E": "Unemployed"})

# Add column for year
us_pop_df["Year"] = year

us_pop_df.head()

Unnamed: 0,Median Age,Population,Population (White Alone),Population (Black Alone),Population (American Indian or Alaskan Native Alone),Population (Asian Alone),Population (Native Hawaiian and Other Pacific Islander Alone),Population (Two or more races),Household Income,Per Capita Income,Employed,Unemployed,NAME,Zipcode,Year
0,46.2,8642.0,8226.0,256.0,2.0,21.0,0.0,137.0,42826.0,24329.0,3564.0,342.0,ZCTA5 43964,43964,2018
1,35.2,51116.0,13583.0,32929.0,25.0,1054.0,0.0,1251.0,48647.0,26775.0,25800.0,2533.0,ZCTA5 28216,28216,2018
2,38.8,71605.0,51272.0,6346.0,200.0,10533.0,0.0,2276.0,105885.0,51740.0,37198.0,1595.0,ZCTA5 28277,28277,2018
3,35.6,27286.0,14515.0,8672.0,7.0,1709.0,53.0,1740.0,90282.0,38536.0,15182.0,856.0,ZCTA5 28278,28278,2018
4,35.1,29414.0,14358.0,11486.0,187.0,1185.0,132.0,1721.0,39896.0,27452.0,11384.0,805.0,ZCTA5 28303,28303,2018


Get US Poverty Data

In [4]:
# Run API Call for Poverty Data
us_poverty_data = c.acs5.get(("NAME", "B17001_002E", "B17001_003E", "B17001_017E",
                      "B17001A_002E", "B17001B_002E", "B17001C_002E", "B17001D_002E",
                      "B17001E_002E", "B17001G_002E"), 
                      {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
us_poverty_df = pd.DataFrame(us_poverty_data)

In [5]:
# Column Reordering
us_poverty_df = us_poverty_df.rename(columns={
                                  "zip code tabulation area": "Zipcode",
                                  "B17001_002E": "Total Poverty Count",
                                  "B17001_003E": "Poverty Male",
                                  "B17001_017E": "Poverty Female",
                                  "B17001A_002E": "Poverty White",
                                  "B17001B_002E": "Poverty Black",
                                  "B17001C_002E": "Poverty American Indian",
                                  "B17001D_002E": "Poverty Asian",
                                  "B17001E_002E": "Poverty Hispanic",
                                  "B17001G_002E": "Poverty Two or More Races"})

# Add column for year
us_poverty_df["Year"] = year

us_poverty_df.head()

Unnamed: 0,Poverty White,Poverty Black,Poverty American Indian,Poverty Asian,Poverty Hispanic,Poverty Two or More Races,Total Poverty Count,Poverty Male,Poverty Female,NAME,Zipcode,Year
0,1506.0,120.0,2.0,4.0,0.0,10.0,1642.0,817.0,825.0,ZCTA5 43964,43964,2018
1,960.0,5720.0,9.0,49.0,0.0,92.0,7238.0,2763.0,4475.0,ZCTA5 28216,28216,2018
2,1847.0,96.0,11.0,398.0,0.0,102.0,2660.0,1187.0,1473.0,ZCTA5 28277,28277,2018
3,545.0,511.0,7.0,111.0,0.0,55.0,1257.0,645.0,612.0,ZCTA5 28278,28278,2018
4,1981.0,3173.0,64.0,134.0,0.0,517.0,5981.0,2640.0,3341.0,ZCTA5 28303,28303,2018


Get Housing Data

In [6]:
# Run API Call for Housing Data
us_housing_data = c.acs5.get(("NAME", "B25035_001E", "B25058_001E", 
                              "B25064_001E", "B25077_001E", "B25088_002E"), 
                              {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
us_housing_df = pd.DataFrame(us_housing_data)

In [7]:
# Column Reordering
us_housing_df = us_housing_df.rename(columns={
                                  "zip code tabulation area": "Zipcode",
                                  "B25035_001E": "Median year housing units were built",
                                  "B25058_001E": "Median contract rent",
                                  "B25064_001E": "Median gross rent (contract rent plus utilities)",
                                  "B25077_001E": "Median value (dollars) for Owner-Occupied housing units",
                                  "B25088_002E": "Median Selected Monthly Owner Costs (Dollars) by Mortgage Status"})

# Add column for year
us_housing_df["Year"] = year

us_housing_df.head()

Unnamed: 0,Median year housing units were built,Median contract rent,Median gross rent (contract rent plus utilities),Median value (dollars) for Owner-Occupied housing units,Median Selected Monthly Owner Costs (Dollars) by Mortgage Status,NAME,Zipcode,Year
0,1982.0,284.0,363.0,86200.0,801.0,ZCTA5 00601,601,2018
1,1979.0,316.0,405.0,86300.0,877.0,ZCTA5 00602,602,2018
2,1979.0,322.0,427.0,122400.0,839.0,ZCTA5 00603,603,2018
3,1979.0,216.0,312.0,91600.0,551.0,ZCTA5 00606,606,2018
4,1979.0,323.0,410.0,88600.0,743.0,ZCTA5 00610,610,2018


Get Education Data

In [8]:
# Run API Call for Housing Data
us_education_data = c.acs5.get(("NAME", "B15003_002E", "B15003_017E", "B15003_018E", 
                                "B15003_021E", "B15003_022E", "B15003_023E", 
                                "B15003_024E," "B15003_025E"), 
                                {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
us_education_df = pd.DataFrame(us_education_data)

In [9]:
# Column Reordering
us_education_df = us_education_df.rename(columns={
                                  "zip code tabulation area": "Zipcode",
                                  "B15003_002E": "Education (No Schooling)",
                                  "B15003_017E": "Education (High School)",
                                  "B15003_018E": "Education (GED)",
                                  "B15003_021E": "Education (Associate's)",
                                  "B15003_022E": "Education (Bachelor's)",
                                  "B15003_023E": "Education (Master's)",
                                  "B15003_024E": "Education (Professional)",
                                  "B15003_025E": "Education (Doctorate)"})

# Add column for year
us_education_df["Year"] = year

us_education_df.head()

Unnamed: 0,Education (No Schooling),Education (High School),Education (GED),Education (Associate's),Education (Bachelor's),Education (Master's),Education (Professional),Education (Doctorate),NAME,Zipcode,Year
0,52.0,2528.0,220.0,824.0,513.0,255.0,39.0,30.0,ZCTA5 43964,43964,2018
1,445.0,6391.0,1022.0,2938.0,7271.0,3018.0,306.0,214.0,ZCTA5 28216,28216,2018
2,322.0,4386.0,479.0,2941.0,19822.0,9171.0,1952.0,867.0,ZCTA5 28277,28277,2018
3,95.0,2912.0,360.0,1277.0,5647.0,1833.0,516.0,117.0,ZCTA5 28278,28278,2018
4,298.0,3921.0,680.0,2219.0,3357.0,1845.0,400.0,232.0,ZCTA5 28303,28303,2018


## Austin Data
#### Filter out Data for Austin Zip codes

In [10]:
# Create a list of Austin zip codes
Austin_zip_codes = ["78610", "78613", "78617", "78641", "78652", "78653", "78660", "78664", "78681", 
                    "78701", "78702", "78703", "78704", "78705", "78712", "78717", "78719", "78721", 
                    "78722", "78723", "78724", "78725", "78726", "78727", "78728", "78729", "78730", 
                    "78731", "78732", "78733", "78734", "78735", "78736", "78737", "78738", "78739", 
                    "78741", "78742", "78744", "78745", "78746", "78747", "78748", "78749", "78750", 
                    "78751", "78752", "78753", "78754", "78756", "78757", "78758", "78759"]

Filter out Austin Population Data

In [11]:
# Filter out rows where zipcode is in Austin
Austin_pop = us_pop_df[us_pop_df["Zipcode"].isin(Austin_zip_codes)]

# Set zipcode as index
Austin_pop = Austin_pop.set_index("Zipcode")

Austin_pop.head()

Unnamed: 0_level_0,Median Age,Population,Population (White Alone),Population (Black Alone),Population (American Indian or Alaskan Native Alone),Population (Asian Alone),Population (Native Hawaiian and Other Pacific Islander Alone),Population (Two or more races),Household Income,Per Capita Income,Employed,Unemployed,NAME,Year
Zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
78660,34.1,89830.0,59056.0,12760.0,840.0,8755.0,108.0,4487.0,83499.0,33870.0,48999.0,1928.0,ZCTA5 78660,2018
78703,36.1,20890.0,18214.0,239.0,46.0,1889.0,0.0,260.0,105207.0,85040.0,12716.0,247.0,ZCTA5 78703,2018
78721,34.6,12492.0,7154.0,3282.0,55.0,224.0,0.0,378.0,44076.0,22792.0,6559.0,442.0,ZCTA5 78721,2018
78726,32.0,13867.0,9952.0,591.0,0.0,2360.0,0.0,564.0,77935.0,48434.0,8151.0,210.0,ZCTA5 78726,2018
78728,34.8,21480.0,14122.0,3362.0,95.0,1981.0,0.0,766.0,55099.0,34602.0,12667.0,487.0,ZCTA5 78728,2018


In [12]:
# Save as CSV
Austin_pop.to_csv(f"../Resources/Census.gov/By_Year/Austin_population_{year}.csv", encoding="utf-8")

Filter out Austin Poverty Data

In [13]:
# Filter out rows where zipcode is in Austin
Austin_poverty = us_poverty_df[us_poverty_df["Zipcode"].isin(Austin_zip_codes)]

# Set zipcode as index
Austin_poverty = Austin_poverty.set_index("Zipcode")

Austin_poverty.head()

Unnamed: 0_level_0,Poverty White,Poverty Black,Poverty American Indian,Poverty Asian,Poverty Hispanic,Poverty Two or More Races,Total Poverty Count,Poverty Male,Poverty Female,NAME,Year
Zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
78660,4197.0,987.0,0.0,569.0,0.0,227.0,6234.0,2409.0,3825.0,ZCTA5 78660,2018
78703,777.0,1.0,3.0,374.0,0.0,0.0,1176.0,668.0,508.0,ZCTA5 78703,2018
78721,1770.0,1066.0,7.0,44.0,0.0,0.0,3208.0,1308.0,1900.0,ZCTA5 78721,2018
78726,400.0,20.0,0.0,110.0,0.0,94.0,624.0,171.0,453.0,ZCTA5 78726,2018
78728,1534.0,578.0,0.0,81.0,0.0,123.0,2457.0,1281.0,1176.0,ZCTA5 78728,2018


In [14]:
# Save as CSV
Austin_poverty.to_csv(f"../Resources/Census.gov/By_Year/Austin_poverty_{year}.csv", encoding="utf-8")

Filter out Austin Housing Data

In [15]:
# Filter out rows where zipcode is in Austin
Austin_housing = us_housing_df[us_housing_df["Zipcode"].isin(Austin_zip_codes)]

# Set zipcode as index
Austin_housing = Austin_housing.set_index("Zipcode")

Austin_housing.head()

Unnamed: 0_level_0,Median year housing units were built,Median contract rent,Median gross rent (contract rent plus utilities),Median value (dollars) for Owner-Occupied housing units,Median Selected Monthly Owner Costs (Dollars) by Mortgage Status,NAME,Year
Zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
78610,2006.0,1034.0,1238.0,241500.0,1866.0,ZCTA5 78610,2018
78613,2002.0,1205.0,1355.0,275900.0,2018.0,ZCTA5 78613,2018
78617,2000.0,977.0,1229.0,137400.0,1374.0,ZCTA5 78617,2018
78641,2004.0,1231.0,1500.0,236400.0,1797.0,ZCTA5 78641,2018
78652,2000.0,1242.0,1346.0,280100.0,1823.0,ZCTA5 78652,2018


In [16]:
# Save as CSV
Austin_housing.to_csv(f"../Resources/Census.gov/By_Year/Austin_housing_{year}.csv", encoding="utf-8")

Filter out Austin Education Data

In [17]:
# Filter out rows where zipcode is in Austin
Austin_education = us_education_df[us_education_df["Zipcode"].isin(Austin_zip_codes)]

# Set zipcode as index
Austin_education = Austin_education.set_index("Zipcode")

Austin_education.head()

Unnamed: 0_level_0,Education (No Schooling),Education (High School),Education (GED),Education (Associate's),Education (Bachelor's),Education (Master's),Education (Professional),Education (Doctorate),NAME,Year
Zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
78660,918.0,10475.0,1950.0,6005.0,15392.0,4854.0,750.0,488.0,ZCTA5 78660,2018
78703,61.0,333.0,69.0,318.0,6581.0,4265.0,1834.0,773.0,ZCTA5 78703,2018
78721,511.0,1888.0,430.0,292.0,1877.0,558.0,140.0,47.0,ZCTA5 78721,2018
78726,53.0,708.0,24.0,456.0,4155.0,1616.0,185.0,213.0,ZCTA5 78726,2018
78728,137.0,3540.0,369.0,1102.0,4861.0,1345.0,147.0,194.0,ZCTA5 78728,2018


In [18]:
# Save as CSV
Austin_education.to_csv(f"../Resources/Census.gov/By_Year/Austin_education_{year}.csv", encoding="utf-8")