# Dependency setup

## Google Colab setup

In [2]:
# Install the required libraries for running on colab
from IPython.display import clear_output
try:
  !pip install prophet
except:
  print("Error installing libraries to colab instance")
finally:
  clear_output()
  print("Libraries successfully installed to colab instance")


Libraries successfully installed to colab instance


## Package setup

In [7]:
# Import dependencies and libraries
import pandas as pd
from census import Census
from us import states
from dotenv import load_dotenv
import requests
import sys
import csv
import os
import json

# Load environment variables and U.S. Census API key
load_dotenv()
api_key = os.getenv("CENSUS_API_KEY")


# Census Code

In [4]:
# Create an instance of the Census library
c = Census(
    api_key
)
display(type(c))
display(c)


census.core.Census

<census.core.Census at 0x1316b003490>

## Census API Aliases

In [8]:
# Load census aliases
# TODO: Add google colab-specific code to load the census aliases
with open('census_aliases.json') as f:
    census_aliases = json.load(f)

# Display the census aliases
# display(census_aliases)

def printAliases():
  # Show the keys in the census aliases
  alias_count = 1
  print("Census aliases:")
  print("-"*30)
  for key, value in census_aliases.items():
      description = value["description"]
      column_name = value["variable"]
      title = value["text"]
      unit = value["unit"]
      print(f"{alias_count:03d} | {title.capitalize()}: {description}")
      print(f"    | Census column name: {column_name}")
      print(f"    | Unit: {unit}")
      print("-"*30)
      alias_count+=1

# Saving the reference of the standard output
original_stdout = sys.stdout 	

# Save the census aliases to a file
with open('census_aliases.txt', 'w') as f:
    sys.stdout = f
    printAliases()
    # Reset the standard output
    sys.stdout = original_stdout 


## County FIPS Codes

In [None]:
# Define funtions to get FIPS codes for US counties
# https://gist.github.com/cjwinchester/a8ff5dee9c07d161bdf4
def getCounties():
    "Function to return a dict of FIPS codes (keys) of U.S. counties (values)"
    d = {}
    r = requests.get("http://www2.census.gov/geo/docs/reference/codes/files/national_county.txt")
    reader = csv.reader(r.text.splitlines(), delimiter=',')    
    for line in reader:
        d[line[1] + line[2]] = line[3].replace(" County","")    
    return d

def getCountyAdj():
    "Return a list of dicts where each dict has a county FIPS code (key) and a list of FIPS codes of the adjacent counties, not including that county (value)"
    adj = requests.get("http://www2.census.gov/geo/docs/reference/county_adjacency.txt")
    adj_data = adj.text.encode("utf-8")
    reader = csv.reader(adj_data.splitlines(), delimiter='\t')
    ls = []
    d = {}
    countyfips = ""
    for row in reader:
        if row[1] and row[1] != "":
            if d:
                ls.append(d)
            d = {}
            countyfips = row[1]
            d[countyfips] = []
            "Grab the record on the same line"
            try:
                st = row[3]
                if st != countyfips:
                    d[countyfips].append(st)
            except:
                pass
        else:
            "Grab the rest of the records"
            if row[3] and row[3] != "":
                st = row[3]
                if st != countyfips:
                    d[countyfips].append(st)


In [None]:
# Get the FIPS codes for Colorado
colorado_fips = states.CO.fips
print(f"Colorado FIPS code: {colorado_fips}")


In [None]:
# Store county FIPS codes in memory
county_zips = getCounties()
print("All County FIPS codes:")
display(county_zips)


In [None]:
# Filter out only Colorado county FIPS codes
colorado_county_zips = {key: value for key, value in county_zips.items() if key.startswith(colorado_fips)}
print("Colorado County ZIP codes:")
display(colorado_county_zips)


In [None]:
# Prompt for county to look up and retrieve fip
county_name = input("Enter county name to look up the FIPS code: ")
county_zip = list(county_zips.keys()) [list(county_zips.values()).index(county_name)]
county_fip = county_zip[2:]
print(f"The ZIP code for {county_name} is {county_zip}")
print(f"The FIPS code for {county_name} is {county_fip}")


## Census Data

In [None]:
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
census_data = c.acs5.state_county(
  (
      "NAME",
      "B19013_001E",
      "B01003_001E",
      "B01002_001E",
      "B19301_001E",
      "B17001_002E"
  ),
  colorado_fips,
  county_fip,
  year=2021
)

display(census_data)

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column renaming
census_pd = census_pd.rename(
  columns = {
    "B01003_001E": "Population",
    "B01002_001E": "Median Age",
    "B19013_001E": "Household Income",
    "B19301_001E": "Per Capita Income",
    "B17001_002E": "Poverty Count",
    "NAME": "Name",
  }
)

# Add a Poverty Rate column (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * census_pd["Poverty Count"].astype(int) / census_pd["Population"].astype(int)

# Configure the final DataFrame
census_pd = census_pd[
  [
    "Name",
    "Population",
    "Median Age",
    "Household Income",
    "Per Capita Income",
    "Poverty Count",
    "Poverty Rate"
  ]
]

# Display DataFrame length and sample data
print(f"Number of rows in the DataFrame: {len(census_pd)}")
census_pd.head()
