In [9]:
import time
import pandas as pd
from census import Census
import altair as alt

# Census API access
api_key = "639f2aedf7c17b164527591258cda00b25249b4b"
c = Census(key=api_key)

# Conditions

What are the conditions of the census tracts within Richmond City, North Richmond, and Rollingwood, California, in 2022 and 2017?

## Race
What is the racial and ethnic percentages of each census tract? Are there any census tracts with MOEs that are too high to include?

In [3]:
def process_acs_df(in_df):
    '''
    Inputs:
    - in_df (pd.DataFrame): a DataFrame containing race/ethnicity data from Table B03002

    Outputs:
    A modified version of in_df with certain races grouped and proportions and margins
    of error calculated
    '''
    
    # Start by making a copy of in_df, so we don't destroy the original data.
    # df is also a nice short name we can use throughout this function
    df = in_df.copy()

    ### CLEAN UNUSUAL MOES
    df = df.replace(-555555555.0, 0)

    ### AGGREGATE ESTIMATES
    # Define a list of columns to combine into "nh_other"
    nh_other_cols = ['nh_native', 'nh_pi', 'nh_1other', 'nh_multi']

    # Calculate nh_other
    df['nh_other'] = df[nh_other_cols].sum(axis='columns')

    # Use a list comprehension to append "_moe" to all strings in our list
    nh_other_moes = [f'{col}_moe' for col in nh_other_cols]

    # Use this new list to calculate nh_other_moe    
    df['nh_other_moe'] = (df[nh_other_moes]**2).sum(axis='columns')**0.5

    ### CALCULATE PROPORTIONS
    # Iterate through the racial/ethnic groups we're using
    for group in ['nh_white', 'nh_black', 'nh_asian', 'nh_other', 'hispanic']:
        # Calculate the proportion for this group
        df[f'pct_{group}'] = df[group] / df['total']
    
        # Calculate the MOE for this proportion
        df[f'pct_{group}_moe'] = (df[f'{group}_moe']**2 - df[f'pct_{group}']**2 * df['total_moe']**2)**0.5 / df['total']

    return df  # Don't forget to *return* the modified DataFrame

In [20]:
# Define the dict of variables to pull and rename
race_variables = {
    'NAME': 'NAME',
    'GEO_ID': 'GEO_ID',
    'B03002_001E': 'total',
    'B03002_001M': 'total_moe',
    'B03002_003E': 'nh_white',
    'B03002_003M': 'nh_white_moe',
    'B03002_004E': 'nh_black',
    'B03002_004M': 'nh_black_moe',
    'B03002_005E': 'nh_native',
    'B03002_005M': 'nh_native_moe',
    'B03002_006E': 'nh_asian',
    'B03002_006M': 'nh_asian_moe',
    'B03002_007E': 'nh_pi',
    'B03002_007M': 'nh_pi_moe',
    'B03002_008E': 'nh_1other',
    'B03002_008M': 'nh_1other_moe',
    'B03002_009E': 'nh_multi',
    'B03002_009M': 'nh_multi_moe',
    'B03002_012E': 'hispanic',
    'B03002_012M': 'hispanic_moe',
}

# Get ACS 2022 5 year Table B03002 in Richmond City, North Richmond, Rollingwood, CA
# Get ACS 2022 5 year Table B03002 in Contra Costa County
# county:013
c_tracts = 'tract:380001,380002,378000,365002,365003,379000,377000,376000,374000,371000,367200'

df_tracts = pd.DataFrame(
    c.acs5.get(
        list(race_variables.keys()),
        {'for': c_tracts, 'in': 'state:06 county:013'},
        year=2022
    )
)
df_county = pd.DataFrame(
    c.acs5.get(
        list(race_variables.keys()),
        {'for': 'county:013', 'in': 'state:06'},
        year=2022
    )
)

# Rename the DataFrame columns again using the dict
df_tracts = df_tracts.rename(columns=race_variables)
df_county = df_county.rename(columns=race_variables)

# Combo races, generate percentages and percentage MOEs
df_tracts_processed = process_acs_df(df_tracts)
df_county_processed = process_acs_df(df_county)

#export settings, for a combined csv
'''
df_out = pd.concat([df_tracts_processed, df_county_processed])
df_out = df_out[[
    'NAME', 'GEO_ID', 'tract',
    'pct_nh_white', 'pct_nh_white_moe',
    'pct_nh_black', 'pct_nh_black_moe',
    'pct_nh_asian', 'pct_nh_asian_moe',
    'pct_nh_other', 'pct_nh_other_moe',
    'pct_hispanic', 'pct_hispanic_moe',
]]
#export to csv
df_out.to_csv('race_pct.csv', index=False)
'''

#format for inline chart
# TODO: melt into tidy data
rt_out = pd.melt(df_tracts_processed,
                 ["tract"],
                 var_name="race",
                 value_name="percentage")
rt_out = rt_out.sort_values(by="tract")
'''
rt_out = df_tracts_processed[[
    'NAME', 'GEO_ID', 'tract',
    'pct_nh_white', 'pct_nh_white_moe',
    'pct_nh_black', 'pct_nh_black_moe',
    'pct_nh_asian', 'pct_nh_asian_moe',
    'pct_nh_other', 'pct_nh_other_moe',
    'pct_hispanic', 'pct_hispanic_moe'
]]
'''

rt_out

Unnamed: 0,tract,race,percentage
0,365002,NAME,Census Tract 3650.02; Contra Costa County; Cal...
55,365002,nh_white_moe,168.0
66,365002,nh_black,779.0
77,365002,nh_black_moe,291.0
88,365002,nh_native,0.0
...,...,...,...
296,380002,pct_nh_black,0.248991
307,380002,pct_nh_black_moe,0.124309
318,380002,pct_nh_asian,0.170317
164,380002,nh_1other,0.0


### Race per tract

Currently without MOEs (todo)

In [18]:
# Make a grouped bar chart of census tracts
alt.Chart(rt_out).mark_bar().encode(
    x='tract:N',
    y='pct_nh_white:Q',
    #color='',
    column='tract:N'
).properties(
    width=300,
    height=300
)

## Tenure
What percent of residents rent vs own?

In [5]:
def process_acs_tenure(in_df):
    '''
    Inputs:
    - in_df: a DataFrame containing owner/renter data from Table B25003

    Outputs:
    A modified version of in_df with proportions and margins
    of error calculated
    '''
    df = in_df.copy()

    ### CLEAN UNUSUAL MOES
    df = df.replace(-555555555.0, 0)

    ### CALCULATE PROPORTIONS
    # Iterate through the tenure
    for group in ['owner_occupied', 'renter_occupied']:
        # Calculate the proportion for this group
        df[f'pct_{group}'] = df[group] / df['total']
    
        # Calculate the MOE for this proportion
        df[f'pct_{group}_moe'] = (df[f'{group}_moe']**2 - df[f'pct_{group}']**2 * df['total_moe']**2)**0.5 / df['total']
    
    return df

In [6]:
# Define the dict of variables to pull and rename
tenure_variables = {
    'NAME': 'NAME',
    'GEO_ID': 'GEO_ID',
    'B25003_001E': 'total',
    'B25003_001M': 'total_moe',
    'B25003_002E': 'owner_occupied',
    'B25003_002M': 'owner_occupied_moe',
    'B25003_003E': 'renter_occupied',
    'B25003_003M': 'renter_occupied_moe',
}

# Get ACS 2022 5 year Table B25003 in Richmond City, North Richmond, Rollingwood, CA
# Get ACS 2022 5 year Table B25003 in Contra Costa County
t_tracts = pd.DataFrame(
    c.acs5.get(
        list(tenure_variables.keys()),
        {'for': c_tracts, 'in': 'state:06 county:013'},
        year=2022
    )
)
t_county = pd.DataFrame(
    c.acs5.get(
        list(tenure_variables.keys()),
        {'for': 'county:013', 'in': 'state:06'},
        year=2022
    )
)

#rename columns
t_tracts = t_tracts.rename(columns=tenure_variables)
t_county = t_county.rename(columns=tenure_variables)

#calculate percentages
t_tracts_processed = process_acs_tenure(t_tracts)
t_county_processed = process_acs_tenure(t_county)

#export settings
t_out = pd.concat([t_tracts_processed, t_county_processed])
#export to csv
#t_out.to_csv('tenure_pct.csv', index=False)