# Dependency setup

## Google Colab setup

In [None]:
# Install the required libraries for running on colab
from IPython.display import clear_output
try:
  !pip install prophet
except:
  print("Error installing libraries to colab instance")
finally:
  clear_output()
  print("Libraries successfully installed to colab instance")


## Package setup

In [2]:
# Import dependencies and libraries
import pandas as pd
from census import Census
from us import states
from dotenv import load_dotenv
import requests
import sys
import csv
import os
import json

# Load environment variables and U.S. Census API key
load_dotenv()
api_key = os.getenv("CENSUS_API_KEY")


# Census Code

In [6]:
# Create an instance of the Census library
c = Census(
    api_key
)
display(type(c))
display(c)


census.core.Census

<census.core.Census at 0x18d3372f7f0>

## Census API Aliases

In [5]:
# Load census aliases
# TODO: Add google colab-specific code to load the census aliases
with open('./data/census_aliases.json') as f:
    census_aliases = json.load(f)

# Display the census aliases
# display(census_aliases)

def printAliases():
  # Show the keys in the census aliases
  alias_count = 1
  print("Census aliases:")
  print("-"*30)
  for key, value in census_aliases.items():
      description = value["description"]
      column_name = value["variable"]
      title = value["text"]
      unit = value["unit"]
      print(f"{alias_count:03d} | {title.capitalize()}: {description}")
      print(f"    | Census column name: {column_name}")
      print(f"    | Unit: {unit}")
      print("-"*30)
      alias_count+=1

# Saving the reference of the standard output
original_stdout = sys.stdout 	

# Save the census aliases to a file
with open('census_aliases.txt', 'w') as f:
    sys.stdout = f
    printAliases()
    # Reset the standard output
    sys.stdout = original_stdout 


## County FIPS Codes

In [8]:
# Define funtions to get FIPS codes for US counties
# https://gist.github.com/cjwinchester/a8ff5dee9c07d161bdf4
def getCounties():
    "Function to return a dict of FIPS codes (keys) of U.S. counties (values)"
    d = {}
    r = requests.get("http://www2.census.gov/geo/docs/reference/codes/files/national_county.txt")
    reader = csv.reader(r.text.splitlines(), delimiter=',')    
    for line in reader:
        d[line[1] + line[2]] = line[3].replace(" County","")    
    return d

def getCountyAdj():
    "Return a list of dicts where each dict has a county FIPS code (key) and a list of FIPS codes of the adjacent counties, not including that county (value)"
    adj = requests.get("http://www2.census.gov/geo/docs/reference/county_adjacency.txt")
    adj_data = adj.text.encode("utf-8")
    reader = csv.reader(adj_data.splitlines(), delimiter='\t')
    ls = []
    d = {}
    countyfips = ""
    for row in reader:
        if row[1] and row[1] != "":
            if d:
                ls.append(d)
            d = {}
            countyfips = row[1]
            d[countyfips] = []
            "Grab the record on the same line"
            try:
                st = row[3]
                if st != countyfips:
                    d[countyfips].append(st)
            except:
                pass
        else:
            "Grab the rest of the records"
            if row[3] and row[3] != "":
                st = row[3]
                if st != countyfips:
                    d[countyfips].append(st)


In [14]:
# Get the FIPS codes for Colorado
colorado_fips = states.CO.fips
print(f"Colorado FIPS code: {colorado_fips}")


Colorado FIPS code: 08


In [13]:
# Store county FIPS codes in memory
county_fips = getCounties()
print("All County FIPS codes:")
display(county_fips)


All County FIPS codes:


{'01001': 'Autauga',
 '01003': 'Baldwin',
 '01005': 'Barbour',
 '01007': 'Bibb',
 '01009': 'Blount',
 '01011': 'Bullock',
 '01013': 'Butler',
 '01015': 'Calhoun',
 '01017': 'Chambers',
 '01019': 'Cherokee',
 '01021': 'Chilton',
 '01023': 'Choctaw',
 '01025': 'Clarke',
 '01027': 'Clay',
 '01029': 'Cleburne',
 '01031': 'Coffee',
 '01033': 'Colbert',
 '01035': 'Conecuh',
 '01037': 'Coosa',
 '01039': 'Covington',
 '01041': 'Crenshaw',
 '01043': 'Cullman',
 '01045': 'Dale',
 '01047': 'Dallas',
 '01049': 'DeKalb',
 '01051': 'Elmore',
 '01053': 'Escambia',
 '01055': 'Etowah',
 '01057': 'Fayette',
 '01059': 'Franklin',
 '01061': 'Geneva',
 '01063': 'Greene',
 '01065': 'Hale',
 '01067': 'Henry',
 '01069': 'Houston',
 '01071': 'Jackson',
 '01073': 'Jefferson',
 '01075': 'Lamar',
 '01077': 'Lauderdale',
 '01079': 'Lawrence',
 '01081': 'Lee',
 '01083': 'Limestone',
 '01085': 'Lowndes',
 '01087': 'Macon',
 '01089': 'Madison',
 '01091': 'Marengo',
 '01093': 'Marion',
 '01095': 'Marshall',
 '01097': 

In [20]:
# Filter out only Colorado county FIPS codes
colorado_county_fips = {key: value for key, value in county_fips.items() if key.startswith(colorado_fips)}
print("Colorado County FIPS codes:")
display(colorado_county_fips)
display(len(colorado_county_fips))


Colorado County FIPS codes:


{'08001': 'Adams',
 '08003': 'Alamosa',
 '08005': 'Arapahoe',
 '08007': 'Archuleta',
 '08009': 'Baca',
 '08011': 'Bent',
 '08013': 'Boulder',
 '08014': 'Broomfield',
 '08015': 'Chaffee',
 '08017': 'Cheyenne',
 '08019': 'Clear Creek',
 '08021': 'Conejos',
 '08023': 'Costilla',
 '08025': 'Crowley',
 '08027': 'Custer',
 '08029': 'Delta',
 '08031': 'Denver',
 '08033': 'Dolores',
 '08035': 'Douglas',
 '08037': 'Eagle',
 '08039': 'Elbert',
 '08041': 'El Paso',
 '08043': 'Fremont',
 '08045': 'Garfield',
 '08047': 'Gilpin',
 '08049': 'Grand',
 '08051': 'Gunnison',
 '08053': 'Hinsdale',
 '08055': 'Huerfano',
 '08057': 'Jackson',
 '08059': 'Jefferson',
 '08061': 'Kiowa',
 '08063': 'Kit Carson',
 '08065': 'Lake',
 '08067': 'La Plata',
 '08069': 'Larimer',
 '08071': 'Las Animas',
 '08073': 'Lincoln',
 '08075': 'Logan',
 '08077': 'Mesa',
 '08079': 'Mineral',
 '08081': 'Moffat',
 '08083': 'Montezuma',
 '08085': 'Montrose',
 '08087': 'Morgan',
 '08089': 'Otero',
 '08091': 'Ouray',
 '08093': 'Park',
 

64

In [17]:
# Prompt for county to look up and retrieve fip
county_name = input("Enter county name to look up the FIPS code: ")
state_county_fip = list(county_fips.keys()) [list(county_fips.values()).index(county_name)]
county_fip = state_county_fip[2:]
print(f"The FIPS code for {county_name} is {county_fip}")


The FIPS code for Denver is 031


## Census Data

In [19]:
# Run Census Search to retrieve data on all zip codes
census_data = c.acs5.state_county(
  (
      "NAME",
      "B19013_001E",
      "B01003_001E",
      "B01002_001E",
      "B19301_001E",
      "B17001_002E",
      "B17012_003E"
  ),
  colorado_fips,
  county_fip,
  year=2021
)

display(census_data)

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column renaming
census_pd = census_pd.rename(
  columns = {
    "B01003_001E": "Population",
    "B01002_001E": "Median Age",
    "B19013_001E": "Household Income",
    "B19301_001E": "Per Capita Income",
    "B17001_002E": "Poverty Count",
    "B17012_003E": "Families in Poverty (12mo)",
    "NAME": "Name",
  }
)

# Add a Poverty Rate column (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * census_pd["Poverty Count"].astype(int) / census_pd["Population"].astype(int)

# Configure the final DataFrame
census_pd = census_pd[
  [
    "Name",
    "Population",
    "Median Age",
    "Household Income",
    "Per Capita Income",
    "Poverty Count",
    "Poverty Rate",
    "Families in Poverty (12mo)"
  ]
]

# Display DataFrame length and sample data
print(f"Number of rows in the DataFrame: {len(census_pd)}")
census_pd.head()


[{'NAME': 'Denver County, Colorado',
  'B19013_001E': 78177.0,
  'B01003_001E': 706799.0,
  'B01002_001E': 34.8,
  'B19301_001E': 50642.0,
  'B17001_002E': 80620.0,
  'B17012_003E': 4958.0,
  'state': '08',
  'county': '031'}]

Number of rows in the DataFrame: 1


Unnamed: 0,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Families in Poverty (12mo)
0,"Denver County, Colorado",706799.0,34.8,78177.0,50642.0,80620.0,11.406355,4958.0


# Data Cleaning