In [33]:
import time
import pandas as pd
from census import Census
import altair as alt
import numpy as np

# Census API access
api_key = "639f2aedf7c17b164527591258cda00b25249b4b"
c = Census(key=api_key)

# Analysis

How have transportation habits changed, pre- and post- Covid, by those living in poverty? How has transportation mode changed for those in poverty for the entire Contra Costa County, vs Richmond, Pittsburg/Bay Point, and Concord?

In [18]:
def process_acs_transportation(in_df, poverty):
    '''
    Inputs:
    - in_df: a DataFrame containing transportation mode data from Table B08122
    - poverty: a string ending of a variable, either _bp for below poverty or _ap for above

    Outputs:
    A modified version of in_df with proportions and margins of error calculated
    '''
    df = in_df.copy()

    ### CLEAN UNUSUAL MOES
    df = df.replace(-555555555.0, 0)

    ### AGGREGATE ESTIMATES
    # New Columns
    vehicle_cols = ['vehicle_alone'+poverty, 'vehicle_pool'+poverty]
    other_cols = ['walk'+poverty, 'vehicle_other'+poverty]
    
    df['vehicle'+poverty] = df[vehicle_cols].sum(axis='columns')
    df['other'+poverty] = df[other_cols].sum(axis='columns')

    # Use a list comprehension to append "_moe" to all strings in our list
    vehicle_moes = [f'{col}_moe' for col in vehicle_cols]
    # Use this new list to calculate moes    
    df['vehicle'+poverty+'_moe'] = (df[vehicle_moes]**2).sum(axis='columns')**0.5

    # Use a list comprehension to append "_moe" to all strings in our list
    other_moes = [f'{col}_moe' for col in other_cols]
    # Use this new list to calculate moes    
    df['other'+poverty+'_moe'] = (df[other_moes]**2).sum(axis='columns')**0.5
    
    ### CALCULATE PROPORTIONS
    # Iterate through the groups we're using
    for group in ['vehicle'+poverty, 'public'+poverty, 'wfh'+poverty, 'other'+poverty]:
        # Calculate the proportion for this group
        df[f'pct_{group}'] = df[group] / df['total']
    
        # Calculate the MOE for this proportion
        df[f'pct_{group}_moe'] = (df[f'{group}_moe']**2 - df[f'pct_{group}']**2 * df['total_moe']**2)**0.5 / df['total']

        #NaN-out any too-low absolute n
        df.loc[df.total < 25, f'pct_{group}'] = float('NaN')
        df.loc[df.total < 25, f'pct_{group}_moe'] = float('NaN')
        
        #NaN-out any too-low moe
        df[f'pct_{group}_moe_ratio'] = df[f'pct_{group}_moe']/df[f'pct_{group}']
        df.loc[df[f'pct_{group}_moe_ratio'] > .4, f'pct_{group}'] = float('NaN')
        df.loc[df[f'pct_{group}_moe_ratio'] > .4, f'pct_{group}_moe'] = float('NaN')
    
    ### CALCULATE STANDARD ERRORS
    for col in df.columns:
    # Check whether each column name ends with '_moe', using a built-in string method
    # `if '_moe' in col:` is another possibility, but what if we had a column named `pct_moebius` or something?
        if col.endswith('_moe'):
            # Replace '_moe' with '_se' but only at the end of the name, again using string subsetting
            # col[:-4] selects all but the last four characters in col
            # col.replace('_moe', '_se') is another option but again susceptible to edge cases with oddly named columns
            df[col[:-4] + '_se'] = df[col] / 1.645
    
    return df

In [5]:
def z_statistic(df, col, var_1, var_2):
    '''

    Inputs:
    - df (pd.DataFrame): the table of summary statistics and standard errors.
      Columns must contain col and col + '_se'. Index must contain var_1
      and var_2 (could be place, mode, year, etc)
    - col (string): the column name to be compared across variables
    - var_1, var_2 (string): the variables whose values to compare.

    Output:
    The two-sample z-value (float) of the difference between the values of col
    for var_1 and var_2.
    '''
    # Assign the relevant cells from df to variable names matching the formula
    x1 = df.loc[var_1, col]
    x2 = df.loc[var_2, col]
    se1 = df.loc[var_1, col + '_se']
    se2 = df.loc[var_2, col + '_se']

    # Return the z-value
    return abs((x1 - x2) / (se1**2 + se2**2)**0.5)

In [75]:
#BELOW 100% OF THE POVERTY LINE
#Columns to get and change from the census
transp_variables_bp = {
    'NAME': 'NAME',
    'GEO_ID': 'GEO_ID',
    'B08122_002E': 'total',
    'B08122_002M': 'total_moe',
    'B08122_006E': 'vehicle_alone_bp',
    'B08122_006M': 'vehicle_alone_bp_moe',
    'B08122_010E': 'vehicle_pool_bp',
    'B08122_010M': 'vehicle_pool_bp_moe',
    'B08122_014E': 'public_bp',
    'B08122_014M': 'public_bp_moe',
    'B08122_018E': 'walk_bp',
    'B08122_018M': 'walk_bp_moe',
    'B08122_022E': 'vehicle_other_bp',
    'B08122_022M': 'vehicle_other_bp_moe',
    'B08122_026E': 'wfh_bp',
    'B08122_026M': 'wfh_bp_moe'
}

columns_out_bp = ["NAME", "GEO_ID",
               "total", "total_moe", "total_se",
               "pct_vehicle_bp", "pct_vehicle_bp_moe", "pct_vehicle_bp_se",
               "pct_public_bp", "pct_public_bp_moe", "pct_public_bp_se",
               "pct_wfh_bp", "pct_wfh_bp_moe", "pct_wfh_bp_se",
               "pct_other_bp", "pct_other_bp_moe", "pct_other_bp_se"
              ]

In [51]:
#ABOVE 150% POVERTY LINE
#Columns to get and change from the census
transp_variables_ap = {
    'NAME': 'NAME',
    'GEO_ID': 'GEO_ID',
    'B08122_004E': 'total',
    'B08122_004M': 'total_moe',
    'B08122_008E': 'vehicle_alone_ap',
    'B08122_008M': 'vehicle_alone_ap_moe',
    'B08122_012E': 'vehicle_pool_ap',
    'B08122_012M': 'vehicle_pool_ap_moe',
    'B08122_016E': 'public_ap',
    'B08122_016M': 'public_ap_moe',
    'B08122_020E': 'walk_ap',
    'B08122_020M': 'walk_ap_moe',
    'B08122_024E': 'vehicle_other_ap',
    'B08122_024M': 'vehicle_other_ap_moe',
    'B08122_028E': 'wfh_ap',
    'B08122_028M': 'wfh_ap_moe'
}

columns_out_ap = ["NAME", "GEO_ID",
               "total", "total_moe", "total_se",
               "pct_vehicle_ap", "pct_vehicle_ap_moe", "pct_vehicle_ap_se",
               "pct_public_ap", "pct_public_ap_moe", "pct_public_ap_se",
               "pct_wfh_ap", "pct_wfh_ap_moe", "pct_wfh_ap_se",
               "pct_other_ap", "pct_other_ap_moe", "pct_other_ap_se"
              ]

In [46]:
#Total Levels, no poverty info included
transp_variables_in = {
    'NAME': 'NAME',
    'GEO_ID': 'GEO_ID',
    'B08122_001E': 'total',
    'B08122_001M': 'total_moe',
    'B08122_005E': 'vehicle_alone',
    'B08122_005M': 'vehicle_alone_moe',
    'B08122_009E': 'vehicle_pool',
    'B08122_009M': 'vehicle_pool_moe',
    'B08122_013E': 'public',
    'B08122_013M': 'public_moe',
    'B08122_017E': 'walk',
    'B08122_017M': 'walk_moe',
    'B08122_021E': 'vehicle_other',
    'B08122_021M': 'vehicle_other_moe',
    'B08122_025E': 'wfh',
    'B08122_025M': 'wfh_moe'
}

transp_columns = ["NAME", "GEO_ID",
               "total", "total_moe", "total_se",
               "pct_vehicle", "pct_vehicle_moe", "pct_vehicle_se",
               "pct_public", "pct_public_moe", "pct_public_se",
               "pct_wfh", "pct_wfh_moe", "pct_wfh_se",
               "pct_other", "pct_other_moe", "pct_other_se"
              ]
transp_combo_columns = ["total", "total_moe", "total_se",
               "pct_vehicle", "pct_vehicle_moe", "pct_vehicle_se",
               "pct_public", "pct_public_moe", "pct_public_se",
               "pct_wfh", "pct_wfh_moe", "pct_wfh_se",
               "pct_other", "pct_other_moe", "pct_other_se"
              ]

## Contra Costa County

In [13]:
#Get Table B08122 for Contra Costa County
#Group transportation modes
df_county_2022 = pd.DataFrame(
    c.acs5.get(
        list(transp_variables_in.keys()),
        {'for': 'county:013', 'in': 'state:06'},
        year=2022
    )
)
df_county_2022 = df_county_2022.rename(columns=transp_variables_in)
df_county_2017 = pd.DataFrame(
    c.acs5.get(
        list(transp_variables_in.keys()),
        {'for': 'county:013', 'in': 'state:06'},
        year=2017
    )
)
df_county_2017 = df_county_2017.rename(columns=transp_variables_in)

df_county_processed_2022 = process_acs_transportation(df_county_2022, '')
df_out_2022 = df_county_processed_2022[transp_columns]
df_out_2022.insert(0, "year", 2022)

df_county_processed_2017 = process_acs_transportation(df_county_2017, '')
df_out_2017 = df_county_processed_2017[transp_columns]
df_out_2017.insert(0, "year", 2017)

#now combine both years
df_out_ap = pd.concat([df_out_2017, df_out_2022])

#export to csv
#df_out_ap.to_csv('transp_poverty/transp_pct_ccc.csv', index=False)

In [50]:
z_statistic(df_out, "total", 2017, 2022)

4.27328473788722

## Places

N Richmond: 52162

Richmond City: 60620

El Cerrito: 21796

Pittsburg: 57456

Bay Point: 04415

Lafayette: 39122

In [15]:
def get_transpo_df(year_in, place_type, place_num, poverty):
    '''
    Inputs:
    - year: the year you want from the census
    - place_type: type of location for the census API call (place, county, tract)
    - place_num: code for the census API call
    - poverty: either _bp for below poverty, or _ap for 150% above poverty

    Outputs: 
    - A dataframe with the processed data
    '''
    
    if (poverty == '_bp'):
        var_keys = transp_variables_bp
        columns_out = columns_out_bp
    if (poverty == '_ap'):
        var_keys = transp_variables_ap
        columns_out = columns_out_ap
    else:
        var_keys = transp_variables_in
        columns_out = transp_columns
    
    df = pd.DataFrame(
        c.acs5.get(
            list(var_keys.keys()),
            {'for': place_type+':'+place_num, 'in': 'state:06'},
            year=year_in
        )
    )
    df = df.rename(columns=var_keys)

    df_processed = process_acs_transportation(df, poverty)
    df_out = df_processed[columns_out]
    df_out.insert(0, "year", year_in)
    
    return df_out

In [27]:
def get_transpo_df_precombo(year_in, place_type, place_num, poverty):
    '''
    For places that need to be combined, they need two steps before getting proccessed
    Inputs:
    - year: the year you want from the census
    - place_type: type of location for the census API call (place, county, tract)
    - place_num: code for the census API call
    - poverty: either _bp for below poverty, or _ap for 150% above poverty

    Outputs: 
    - A dataframe with the processed data
    '''
    
    if (poverty == '_bp'):
        var_keys = transp_variables_bp
        columns_out = columns_out_bp
    if (poverty == '_ap'):
        var_keys = transp_variables_ap
        columns_out = columns_out_ap
    else:
        var_keys = transp_variables_in
        columns_out = transp_columns
    
    df = pd.DataFrame(
        c.acs5.get(
            list(var_keys.keys()),
            {'for': place_type+':'+place_num, 'in': 'state:06'},
            year=year_in
        )
    )
    df = df.rename(columns=var_keys)
    
    return df

In [41]:
def combine_places(place_1, place_2, poverty):

    '''
    Combine two census places that have already been processed for pcts but haven't been changed into chart data format yet
    '''
    df_1 = place_1.copy()
    df_2 = place_2.copy()
    df = pd.DataFrame()
    
    #Total the totals
    df["total"] = df_1["total"]+df_2["total"]
    df["total_moe"] = np.sqrt(df_1['total_moe']**2 + df_2['total_moe']**2)
    
    ### CALCULATE PROPORTIONS
    # Iterate through the racial/ethnic groups we're using
    for group in ['vehicle'+poverty, 'public'+poverty, 'wfh'+poverty, 'other'+poverty]:

        df[group] = df_1[group] + df_2[group]
        df[f'{group}_moe'] = np.sqrt(df_1[f'{group}_moe']**2 + df_2[f'{group}_moe']**2)
        
        # Calculate the proportion for this group
        df[f'pct_{group}'] = df[group] / df['total']
    
        # Calculate the MOE for this proportion
        df[f'pct_{group}_moe'] = (df[f'{group}_moe']**2 - df[f'pct_{group}']**2 * df['total_moe']**2)**0.5 / df['total']

        #NaN-out any too-low absolute n
        df.loc[df.total < 25, f'pct_{group}'] = float('NaN')
        df.loc[df.total < 25, f'pct_{group}_moe'] = float('NaN')
        
        #NaN-out any too-low moe
        df[f'pct_{group}_moe_ratio'] = df[f'pct_{group}_moe']/df[f'pct_{group}']
        df.loc[df[f'pct_{group}_moe_ratio'] > .4, f'pct_{group}'] = float('NaN')
        df.loc[df[f'pct_{group}_moe_ratio'] > .4, f'pct_{group}_moe'] = float('NaN')

    ### CALCULATE STANDARD ERRORS
    for col in df.columns:
    # Check whether each column name ends with '_moe', using a built-in string method
    # `if '_moe' in col:` is another possibility, but what if we had a column named `pct_moebius` or something?
        if col.endswith('_moe'):
            # Replace '_moe' with '_se' but only at the end of the name, again using string subsetting
            # col[:-4] selects all but the last four characters in col
            # col.replace('_moe', '_se') is another option but again susceptible to edge cases with oddly named columns
            df[col[:-4] + '_se'] = df[col] / 1.645
    
    return df

In [48]:
#2017
# Richmond City
df_r_2017 = get_transpo_df_precombo(2017, 'place', '60620', '')
df_r_2017_processed = process_acs_transportation(df_r_2017, '')
# North Richmond
df_nr_2017 = get_transpo_df_precombo(2017, 'place', '52162', '')
df_nr_2017_processed = process_acs_transportation(df_nr_2017, '')

#2022
# Richmond City
df_r_2022 = get_transpo_df_precombo(2022, 'place', '60620', '')
df_r_2022_processed = process_acs_transportation(df_r_2022, '')
# North Richmond
df_nr_2022 = get_transpo_df_precombo(2022, 'place', '52162', '')
df_nr_2022_processed = process_acs_transportation(df_nr_2022, '')

In [54]:
#combo Richmond and North Richmond
# 2017
df_richmond_c_2017 = combine_places(df_r_2017_processed,df_nr_2017_processed, '')
df_r_out_2017 = df_richmond_c_2017[transp_combo_columns]
df_r_out_2017.insert(0, "NAME", "Richmond")
df_r_out_2017.insert(0, "year", 2017)

# 2022
df_richmond_c_2022 = combine_places(df_r_2022_processed,df_nr_2022_processed, '')
df_r_out_2022 = df_richmond_c_2022[transp_combo_columns]
df_r_out_2022.insert(0, "NAME", "Richmond")
df_r_out_2022.insert(0, "year", 2022)

df_out_richmond = pd.concat([df_r_out_2017, df_r_out_2022])
df_out_richmond.to_csv('transp_poverty/transp_pct_richmond.csv', index=False)

In [58]:
# Pittsburg and Bay Point
#2017
# Pittsburg
df_p_2017 = get_transpo_df_precombo(2017, 'place', '57456', '')
df_p_2017_processed = process_acs_transportation(df_p_2017, '')
# Bay Point
df_bp_2017 = get_transpo_df_precombo(2017, 'place', '04415', '')
df_bp_2017_processed = process_acs_transportation(df_bp_2017, '')

#2022
# Pittsburg
df_p_2022 = get_transpo_df_precombo(2022, 'place', '57456', '')
df_p_2022_processed = process_acs_transportation(df_p_2022, '')
# Bay Point
df_bp_2022 = get_transpo_df_precombo(2022, 'place', '04415', '')
df_bp_2022_processed = process_acs_transportation(df_bp_2022, '')

In [59]:
#combo Pittsburg and Bay Point
# 2017
df_pbp_2017 = combine_places(df_p_2017_processed,df_bp_2017_processed, '')
df_pbp_out_2017 = df_pbp_2017[transp_combo_columns]
df_pbp_out_2017.insert(0, "NAME", "Pittsburg/Bay Point")
df_pbp_out_2017.insert(0, "year", 2017)

# 2022
df_pbp_2022 = combine_places(df_p_2022_processed,df_bp_2022_processed, '')
df_pbp_out_2022 = df_pbp_2022[transp_combo_columns]
df_pbp_out_2022.insert(0, "NAME", "Pittsburg/Bay Point")
df_pbp_out_2022.insert(0, "year", 2022)

df_out_pbp = pd.concat([df_pbp_out_2017, df_pbp_out_2022])
df_out_pbp.to_csv('transp_poverty/transp_pct_pbp.csv', index=False)

In [57]:
# El Cerrito, All
df_ec_2017 = get_transpo_df(2017, 'place', '21796', '')
df_ec_2022 = get_transpo_df(2022, 'place', '21796', '')
df_out_ec = pd.concat([df_ec_2017, df_ec_2022])
#export
df_out_ec.to_csv('transp_poverty/transp_pct_elcerrito.csv', index=False)

# Lafayette, All
df_la_2017 = get_transpo_df(2017, 'place', '39122', '')
df_la_2022 = get_transpo_df(2022, 'place', '39122', '')
df_out_la = pd.concat([df_la_2017, df_la_2022])
#export
df_out_la.to_csv('transp_poverty/transp_pct_lafayette.csv', index=False)

## Transportation with Poverty

In [105]:
#Get Table B08122 for Richmond City and North Richmond
# N Richmond: 52162
# Richmond City: 60620
# El Cerrito: 21796
# Pittsburg: 57456
# Lafayette: 39122

# Richmond, Below Poverty
df_r_2017_bp = get_transpo_df(2017, 'place', '60620', '_bp')
df_r_2022_bp = get_transpo_df(2022, 'place', '60620', '_bp')
df_out_r_bp = pd.concat([df_r_2017_bp, df_r_2022_bp])
#export
#df_out_r_bp.to_csv('transp_poverty/transp_pct_richmond_bp.csv', index=False)

# Richmond, 150% Above Poverty
df_r_2017_ap = get_transpo_df(2017, 'place', '60620', '_ap')
df_r_2022_ap = get_transpo_df(2022, 'place', '60620', '_ap')
df_out_r_ap = pd.concat([df_r_2017_ap, df_r_2022_ap])
#export
#df_out_ec_ap.to_csv('transp_poverty/transp_pct_richmond_ap.csv', index=False)

In [106]:
# El Cerrito, Below Poverty
df_ec_2017_bp = get_transpo_df(2017, 'place', '21796', '_bp')
df_ec_2022_bp = get_transpo_df(2022, 'place', '21796', '_bp')
df_out_ec_bp = pd.concat([df_ec_2017_bp, df_ec_2022_bp])
#export
#df_out_ec_bp.to_csv('transp_pct_elcerrito_bp.csv', index=False)

# El Cerrito, 150% Above Poverty
df_ec_2017_ap = get_transpo_df(2017, 'place', '21796', '_ap')
df_ec_2022_ap = get_transpo_df(2022, 'place', '21796', '_ap')
df_out_ec_ap = pd.concat([df_ec_2017_ap, df_ec_2022_ap])
#export
#df_out_ec_ap.to_csv('transp_pct_elcerrito_ap.csv', index=False)