In [1]:
# Dependencies
import requests
import json
from pprint import pprint 
import pandas as pd
from sqlalchemy import create_engine

#Create API URL
#API Variables used (as identified in the documentation here: https://api.census.gov/data/2017/acs/acs5/variables.html)   
#040 - state (41 - Oregon)
#860 - zip code tabulation area
#B00001_001E - Estimate!!Total - UNWEIGHTED SAMPLE COUNT OF THE POPULATION
#B19001_001E - Estimate!!Total - HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2017 INFLATION-ADJUSTED DOLLARS)

county_pop_url = "https://api.census.gov/data/2017/acs/acs5?get=NAME,GEO_ID,B00001_001E&for=county:*&in=state:41"
county_hhi_url = "https://api.census.gov/data/2017/acs/acs5?get=NAME,GEO_ID,B19013_001E&for=county:*&in=state:41"

In [2]:
# Get county population information in JSON format from American Community Survey 5 Year Estimates (2017) census API
pop_response = requests.get(f"{county_pop_url}").json()

# Save 2017 county population data to dataframe
pop_by_county = pd.DataFrame(pop_response, columns = ['County_Name','County_ID', 'County_Population', 'State_Num','County_Num']).set_index('County_ID')

# Drop former header (now row 1)
pop_by_county = pop_by_county.drop('GEO_ID')

#Grab only columns that are needed
pop_by_county = pop_by_county[['County_Name', 'County_Population']]

# Remove state from County_Name column
pop_by_county['County_Name']=pop_by_county['County_Name'].str.replace("County, Oregon", "")

# Preview population dataframe
pop_by_county.head()

Unnamed: 0_level_0,County_Name,County_Population
County_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
0500000US41047,Marion,23376
0500000US41029,Jackson,13963
0500000US41019,Douglas,8335
0500000US41023,Grant,1322
0500000US41031,Jefferson,2401


In [3]:
# Get county population information in JSON format from American Community Survey 5 Year Estimates (2017) census API
hhi_response = requests.get(f"{county_hhi_url}").json()

# Save 2017 county median household income data to dataframe
hhi_by_county = pd.DataFrame(hhi_response, columns = ['County_Name','County_ID', 'County_Median_HHI', 'State_Num','County_Num']).set_index('County_ID')

# Drop former header (now row 1)
hhi_by_county = hhi_by_county.drop('GEO_ID')

#Grab only columns that are needed
hhi_by_county = hhi_by_county[['County_Name', 'County_Median_HHI']]

# Preview population dataframe
hhi_by_county.head()

# Remove state from County_Name column
hhi_by_county['County_Name']=hhi_by_county['County_Name'].str.replace("County, Oregon", "")

# Preview Median HHI by County Dataframe
hhi_by_county.head()

Unnamed: 0_level_0,County_Name,County_Median_HHI
County_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
0500000US41047,Marion,53828
0500000US41029,Jackson,48688
0500000US41019,Douglas,44023
0500000US41023,Grant,44826
0500000US41031,Jefferson,48464


In [4]:
county_data = pd.merge(pop_by_county, hhi_by_county, on='County_ID')

county_data.head()

Unnamed: 0_level_0,County_Name_x,County_Population,County_Name_y,County_Median_HHI
County_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0500000US41047,Marion,23376,Marion,53828
0500000US41029,Jackson,13963,Jackson,48688
0500000US41019,Douglas,8335,Douglas,44023
0500000US41023,Grant,1322,Grant,44826
0500000US41031,Jefferson,2401,Jefferson,48464


In [None]:
# Extract Libarry CSV to a dataframe
lib_file = "Resources/Oregon_Library_Directory.csv"
lib_df = pd.read_csv(lib_file)
lib_df.shape

In [None]:
# Creating a new data frame with specific columns needed
new_lib_df = lib_df[['Full Library Name', 'County', 'Type of Library']].copy()
new_lib_df.head()

# Creating a filtered dataframe from specific columns
lib_transformed= new_lib_df.rename(columns={'Full Library Name': 'library_name',
                                                    'County': 'county',
                                                    'Type of Library': 'type'})

# Drop "County" from the county name
lib_transformed['county'] = lib_transformed['county'].str.replace(" County", "")

# Pull through data only for Public and Volunteer libraries
libraries = lib_transformed[~lib_transformed['type'].isin(['Academic', 'Special', 'Tribal'])]

libraries.head(20)

In [None]:
county_lib = libraries.join(libraries, other, 