In [1]:
# importing packages required for this notebook to run successfully
%config InlineBackend.figure_formats = ["retina"]

import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd

from math import isnan
from census import Census
from collections import Counter

In [2]:
# declaring state and county we may want to filter our data for
state_fips = "36"    # New York
county_fips = "047"    # this is not being used in the code but CAN we added to select data for specific counties only
county_name = "Kings County"    # Also known as Brooklyn -- this is not being used in the code but CAN we added to select data for specific counties only

In [5]:
# initializing connection to Census API by creating a client
census = Census("", year=2020)

In [7]:
# P1 is the TOTPOP table from the Decennial Census -- let's pick out race columns
p1_population_columns = {
    "P1_003N": "white",	      # White alone
    "P1_004N": "black",	      # Black or African American alone
    "P1_005N": "amin",        # American Indian and Alaska Native alone
    "P1_006N": "asian",       # Asian alone
    "P1_007N": "nhpi",        # Native Hawaiian and Other Pacific Islander alone
    "P1_008N": "other",       # Some Other Race alone
    "P1_009N": "two_or_more", # Two or more races
}

In [8]:
# Let's also pick out our location identification columns
geo_columns = {
    "NAME": "name",
    "state": "state_fips",
    "county": "county_fips",
    "tract": "tract_code",
    "block": "block_code"
}

In [14]:
counties = census.pl.get( # Calls the Population Estimates / PL dataset via the Census API
    ("NAME",), # Requests the county name field only, if this isn't specified, you will ONLY get the county FIPS code
    geo={"for": "county:*", "in": f"state:{state_fips}"} # gets all counties in state with code state_fips, 36 for New York in this case
)

county_fips_list = [c["county"] for c in counties] # extract all counties into a list
len(county_fips_list), counties[:3] # view the first 3 counties

(62,
 [{'NAME': 'Albany County, New York', 'state': '36', 'county': '001'},
  {'NAME': 'Allegany County, New York', 'state': '36', 'county': '003'},
  {'NAME': 'Bronx County, New York', 'state': '36', 'county': '005'}])

In [15]:
all_blocks = []    # creates an empty python list

for cty in county_fips_list:    # creating a loop which iterated over the 3-digit county FIPS codes to request the following data for each of the counties in New York state
    rows = census.pl.get(
        ("NAME", *p1_population_columns),    # Requests block name, and race population columns declared in dictionary p1_population_columns above
        geo={"for": "block:*", "in": f"state:{state_fips} county:{cty}"}     # requesting data for all blocks within a specific county which is defined by the loop
    )
    all_blocks.extend(rows)    # append each counties block level data into one master list

block_df = pd.DataFrame(all_blocks)    # convert the master list into a dataframe to easily work with it using pandas

In [17]:
rename_map = { # rename columns based on dictionaries we created above -- geo_columns and p1_population_columns
    **geo_columns, 
    **p1_population_columns
}
race_df = block_df.rename(columns=rename_map) 

In [19]:
categories = list(p1_population_columns.values()) # listing all race columns which will later be used to calculate total population

['white', 'black', 'amin', 'asian', 'nhpi', 'other', 'two_or_more']

In [20]:
race_df["total"] = race_df[categories].sum(axis=1) # creating a total population column by adding population counts in race columns

In [22]:
race_with_pcts_df = race_df.copy() # creating a copy of the dataframe to which we can add columns -- this is a hygienic practice to ensure you don't overwrite your original data, so that you can reuse it for other manipulations you may want to do

for col in categories:
    race_with_pcts_df[f"{col}_pct"] = (100 * race_df[col] / race_df["total"]).fillna(0)    # calculating population percentages per race

In [23]:
race_with_pcts_df # viewing final dataframe

Unnamed: 0,name,white,black,amin,asian,nhpi,other,two_or_more,state_fips,county_fips,tract_code,block_code,total,white_pct,black_pct,amin_pct,asian_pct,nhpi_pct,other_pct,two_or_more_pct
0,"Block 1036, Block Group 1, Census Tract 1, Alb...",1.0,31.0,1.0,0.0,0.0,4.0,2.0,36,001,000100,1036,39.0,2.564103,79.487179,2.564103,0.000000,0.0,10.256410,5.128205
1,"Block 1038, Block Group 1, Census Tract 1, Alb...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,36,001,000100,1038,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000
2,"Block 1021, Block Group 1, Census Tract 1, Alb...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,36,001,000100,1021,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000
3,"Block 1022, Block Group 1, Census Tract 1, Alb...",4.0,1.0,0.0,1.0,0.0,3.0,2.0,36,001,000100,1022,11.0,36.363636,9.090909,0.000000,9.090909,0.0,27.272727,18.181818
4,"Block 1024, Block Group 1, Census Tract 1, Alb...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,36,001,000100,1024,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
288814,"Block 2062, Block Group 2, Census Tract 1505.0...",11.0,0.0,0.0,0.0,0.0,0.0,2.0,36,123,150502,2062,13.0,84.615385,0.000000,0.000000,0.000000,0.0,0.000000,15.384615
288815,"Block 2066, Block Group 2, Census Tract 1505.0...",8.0,0.0,0.0,0.0,0.0,0.0,0.0,36,123,150502,2066,8.0,100.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000
288816,"Block 2016, Block Group 2, Census Tract 1505.0...",36.0,1.0,0.0,0.0,0.0,0.0,0.0,36,123,150502,2016,37.0,97.297297,2.702703,0.000000,0.000000,0.0,0.000000,0.000000
288817,"Block 2067, Block Group 2, Census Tract 1505.0...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,36,123,150502,2067,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000


In [25]:
race_with_pcts_df.to_csv("excel_census_race_data.csv", index=True) # writing dataframe to csv 