In [1]:
import time
import pandas as pd
from census import Census
import altair as alt

# Census API access
api_key = "639f2aedf7c17b164527591258cda00b25249b4b"
c = Census(key=api_key)

# Analysis

How have transportation habits changed, pre- and post- Covid, by those living in poverty? How has transportation mode changed for those in poverty for the entire Contra Costa County, vs Richmond, Pittsburg/Bay Point, and Concord?

In [28]:
def process_acs_transportation(in_df):
    '''
    Inputs:
    - in_df: a DataFrame containing transportation mode data from Table B08122

    Outputs:
    A modified version of in_df with proportions and margins of error calculated
    '''
    df = in_df.copy()

    ### CLEAN UNUSUAL MOES
    df = df.replace(-555555555.0, 0)

    ### AGGREGATE ESTIMATES
    # New Columns
    vehicle_cols = ['vehicle_alone_bp', 'vehicle_pool_bp']
    other_cols = ['walk_bp', 'vehicle_other_bp']
    
    df['vehicle_bp'] = df[vehicle_cols].sum(axis='columns')
    df['other_bp'] = df[other_cols].sum(axis='columns')

    # Use a list comprehension to append "_moe" to all strings in our list
    vehicle_moes = [f'{col}_moe' for col in vehicle_cols]
    # Use this new list to calculate moes    
    df['vehicle_bp_moe'] = (df[vehicle_moes]**2).sum(axis='columns')**0.5

    # Use a list comprehension to append "_moe" to all strings in our list
    other_moes = [f'{col}_moe' for col in other_cols]
    # Use this new list to calculate moes    
    df['other_bp_moe'] = (df[other_moes]**2).sum(axis='columns')**0.5

    ### CALCULATE PROPORTIONS
    # Iterate through the racial/ethnic groups we're using
    for group in ['vehicle_bp', 'public_bp', 'wfh_bp', 'other_bp']:
        # Calculate the proportion for this group
        df[f'pct_{group}'] = df[group] / df['total']
    
        # Calculate the MOE for this proportion
        df[f'pct_{group}_moe'] = (df[f'{group}_moe']**2 - df[f'pct_{group}']**2 * df['total_moe']**2)**0.5 / df['total']

    ### CALCULATE STANDARD ERRORS
    for col in df.columns:
    # Check whether each column name ends with '_moe', using a built-in string method
    # `if '_moe' in col:` is another possibility, but what if we had a column named `pct_moebius` or something?
        if col.endswith('_moe'):
            # Replace '_moe' with '_se' but only at the end of the name, again using string subsetting
            # col[:-4] selects all but the last four characters in col
            # col.replace('_moe', '_se') is another option but again susceptible to edge cases with oddly named columns
            df[col[:-4] + '_se'] = df[col] / 1.645
    
    return df

In [22]:
def z_statistic(df, col, var_1, var_2):
    '''

    Inputs:
    - df (pd.DataFrame): the table of summary statistics and standard errors.
      Columns must contain col and col + '_se'. Index must contain var_1
      and var_2 (could be place, mode, year, etc)
    - col (string): the column name to be compared across variables
    - var_1, var_2 (string): the variables whose values to compare.

    Output:
    The two-sample z-value (float) of the difference between the values of col
    for var_1 and var_2.
    '''
    # Assign the relevant cells from df to variable names matching the formula
    x1 = df.loc[var_1, col]
    x2 = df.loc[var_2, col]
    se1 = df.loc[var_1, col + '_se']
    se2 = df.loc[var_2, col + '_se']

    # Return the z-value
    return abs((x1 - x2) / (se1**2 + se2**2)**0.5)

In [30]:
#Columns to get and change from the census
transp_variables = {
    'NAME': 'NAME',
    'GEO_ID': 'GEO_ID',
    'B08122_002E': 'total',
    'B08122_002M': 'total_moe',
    'B08122_006E': 'vehicle_alone_bp',
    'B08122_006M': 'vehicle_alone_bp_moe',
    'B08122_010E': 'vehicle_pool_bp',
    'B08122_010M': 'vehicle_pool_bp_moe',
    'B08122_014E': 'public_bp',
    'B08122_014M': 'public_bp_moe',
    'B08122_018E': 'walk_bp',
    'B08122_018M': 'walk_bp_moe',
    'B08122_022E': 'vehicle_other_bp',
    'B08122_022M': 'vehicle_other_bp_moe',
    'B08122_026E': 'wfh_bp',
    'B08122_026M': 'wfh_bp_moe'
}

#Get Table B08122 for Contra Costa County, only low income
#Group transportation modes
df_county = pd.DataFrame(
    c.acs5.get(
        list(transp_variables.keys()),
        {'for': 'county:013', 'in': 'state:06'},
        year=2022
    )
)
df_county = df_county.rename(columns=transp_variables)

columns_out = ["NAME", "GEO_ID", 
               "total", "total_moe", "total_se"
               "pct_vehicle_bp", "pct_vehicle_bp_moe", "pct_vehicle_bp_se",
               "pct_public_bp", "pct_public_bp_moe", "pct_public_bp_se",
               "pct_wfh_bp", "pct_wfh_bp_moe", "pct_wfh_bp_se",
               "pct_other_bp", "pct_other_bp_moe", "pct_other_bp_se"
              ]

df_county_processed = process_acs_transportation(df_county)
df_out = df_county_processed[columns_out]
df_out

#export to csv
#df_county_processed.to_csv('transp_pct.csv', index=False)

KeyError: "['total_sepct_vehicle_bp'] not in index"