In [1]:
import time
import pandas as pd
from census import Census
import altair as alt
import numpy as np

# Census API access
api_key = "639f2aedf7c17b164527591258cda00b25249b4b"
c = Census(key=api_key)

In [2]:
def z_statistic(x1, x2, moe1, moe2):
    '''

    Inputs:
    - the two values (x1, x2), and their MOEs

    Output:
    The two-sample z-value (float) of the difference between the two values
    '''
    # Calculate the SEs
    se1 = moe1 / 1.645
    se2 = moe2 / 1.645

    # Return the z-value
    return abs((x1 - x2) / (se1**2 + se2**2)**0.5)

In [3]:
# Vehicles Available: Table B08141
# Workers 16 Years and Older in Households
veh_variables = {
    'B08141_001E': 'total',
    'B08141_001M': 'total_moe',
    'B08141_002E': 'no_vehicle',
    'B08141_002M': 'no_vehicle_moe',
    'B08141_003E': 'one_vehicle',
    'B08141_003M': 'one_vehicle_moe',
    'B08141_004E': 'two_vehicle',
    'B08141_004M': 'two_vehicle_moe',
    'B08141_005E': 'three_vehicle',
    'B08141_005M': 'three_vehicle_moe',
}
veh_columns_out = [
    'total', 'total_moe',
    'pct_no_vehicle', 'pct_no_vehicle_moe',
    'pct_one_vehicle', 'pct_one_vehicle_moe',
    'pct_two_or_more', 'pct_two_or_more_moe'
]

In [4]:
def combine_vehicles(in_df):
    '''
    Outputs: simplified vehicle breakdown with MOEs
    '''
    df = in_df.copy()
    
    ### AGGREGATE ESTIMATES
    # Define a list of columns to combine
    two_more_cols = ['two_vehicle', 'three_vehicle']
    df['two_or_more'] = df[two_more_cols].sum(axis='columns')
    # Use a list comprehension to append "_moe" to all strings in our list
    two_more_moes = [f'{col}_moe' for col in two_more_cols]    
    df['two_or_more_moe'] = (df[two_more_moes]**2).sum(axis='columns')**0.5
    
    for group in ['no_vehicle', 'one_vehicle', 'two_or_more']:
        # Calculate the proportion for this group
        df[f'pct_{group}'] = df[group] / df['total']
    
        # Calculate the MOE for this proportion
        df[f'pct_{group}_moe'] = (df[f'{group}_moe']**2 - df[f'pct_{group}']**2 * df['total_moe']**2)**0.5 / df['total']

        #NaN-out any too-low absolute n
        df.loc[df['total'] < 25, f'pct_{group}'] = float('NaN')
        df.loc[df['total'] < 25, f'pct_{group}_moe'] = float('NaN')
        
        #NaN-out any too-low moe
        df[f'pct_{group}_moe_ratio'] = df[f'pct_{group}_moe']/df[f'pct_{group}']
        df.loc[df[f'pct_{group}_moe_ratio'] > .4, f'pct_{group}'] = float('NaN')
        df.loc[df[f'pct_{group}_moe_ratio'] > .4, f'pct_{group}_moe'] = float('NaN')
    
    return df

In [5]:
def combine_places(place_1, place_2):
    
    df_1 = place_1.copy()
    df_2 = place_2.copy()
    df = pd.DataFrame()
    
    #Total the totals
    df["total"] = df_1["total"]+df_2["total"]
    df["total_moe"] = np.sqrt(df_1['total_moe']**2 + df_2['total_moe']**2)
    
    ### CALCULATE PROPORTIONS
    for group in ['no_vehicle', 'one_vehicle', 'two_or_more']:

        df[group] = df_1[group] + df_2[group]
        df[f'{group}_moe'] = np.sqrt(df_1[f'{group}_moe']**2 + df_2[f'{group}_moe']**2)
        
        # Calculate the proportion for this group
        df[f'pct_{group}'] = df[group] / df['total']
    
        # Calculate the MOE for this proportion
        df[f'pct_{group}_moe'] = (df[f'{group}_moe']**2 - df[f'pct_{group}']**2 * df['total_moe']**2)**0.5 / df['total']

        #NaN-out any too-low absolute n
        df.loc[df.total < 25, f'pct_{group}'] = float('NaN')
        df.loc[df.total < 25, f'pct_{group}_moe'] = float('NaN')
        
        #NaN-out any too-low moe
        df[f'pct_{group}_moe_ratio'] = df[f'pct_{group}_moe']/df[f'pct_{group}']
        df.loc[df[f'pct_{group}_moe_ratio'] > .4, f'pct_{group}'] = float('NaN')
        df.loc[df[f'pct_{group}_moe_ratio'] > .4, f'pct_{group}_moe'] = float('NaN')

    return df

In [6]:
def get_place_precombo(year_in, place_type, place_num):
    # Call the census API for a place before it is combined with another place
    
    df = pd.DataFrame(
        c.acs5.get(
            list(veh_variables.keys()),
            {'for': place_type+':'+place_num, 'in': 'state:06'},
            year=year_in
        )
    )

    df = df.rename(columns=veh_variables)
    df = combine_vehicles(df)
    
    return df

In [7]:
def get_county_df(year_in):
    # County
    df = pd.DataFrame(
        c.acs5.get(
            list(veh_variables.keys()),
            {'for': 'county:013', 'in': 'state:06'},
            year=year_in
        )
    )
    df = df.rename(columns=veh_variables)
    df_out = combine_vehicles(df)
    df_out = df_out[veh_columns_out]
    df_out.insert(0, "year", year_in)

    return df_out

## County

In [8]:
# for 5yr ACS 2017 and 2022
# Get ACS Table B08141 in Contra Costa County
df_county_2017 = get_county_df(2017)
df_county_2022 = get_county_df(2022)

In [9]:
df_out_county = pd.concat([df_county_2017, df_county_2022])

In [10]:
df_out_county

Unnamed: 0,year,total,total_moe,pct_no_vehicle,pct_no_vehicle_moe,pct_one_vehicle,pct_one_vehicle_moe,pct_two_or_more,pct_two_or_more_moe
0,2017,518482.0,3126.0,0.020475,0.001824,0.163026,0.004636,0.816499,0.008309
0,2022,554934.0,2773.0,0.021294,0.002515,0.163919,0.005564,0.814787,0.010215


## Cities

Get ACS Table B08141 for four cities

In [11]:
# 2017
# Richmond City
df_r_2017 = get_place_precombo(2017, 'place', '60620')
# North Richmond
df_nr_2017 = get_place_precombo(2017, 'place', '52162')

# 2022
# Richmond City
df_r_2022 = get_place_precombo(2022, 'place', '60620')
# North Richmond
df_nr_2022 = get_place_precombo(2022, 'place', '52162')

In [15]:
#combo Richmond with North Richmond
# 2017
df_richmond_2017 = combine_places(df_r_2017, df_nr_2017)
df_richmond_2017 = df_richmond_2017[veh_columns_out]
df_richmond_2017.insert(0, "NAME", "Richmond")
df_richmond_2017.insert(0, "year", 2017)

# 2022
df_richmond_2022 = combine_places(df_r_2022, df_nr_2022)
df_richmond_2022 = df_richmond_2022[veh_columns_out]
df_richmond_2022.insert(0, "NAME", "Richmond")
df_richmond_2022.insert(0, "year", 2022)

df_out_richmond = pd.concat([df_richmond_2017, df_richmond_2022])

In [18]:
# Pittsburg and Bay Point
# 2017
# Pittsburg
df_p_2017 = get_place_precombo(2017, 'place', '57456')
# Bay Point
df_bp_2017 = get_place_precombo(2017, 'place', '04415')

# 2022
# Pittsburg
df_p_2022 = get_place_precombo(2022, 'place', '57456')
# Bay Point
df_bp_2022 = get_place_precombo(2022, 'place', '04415')

In [21]:
#combo Pittsburg and Bay Point
# 2017
df_pbp_2017 = combine_places(df_p_2017, df_bp_2017)
df_pbp_2017 = df_pbp_2017[veh_columns_out]
df_pbp_2017.insert(0, "NAME", "Pittsburg/Bay Point")
df_pbp_2017.insert(0, "year", 2017)

# 2022
df_pbp_2022 = combine_places(df_p_2022, df_bp_2022)
df_pbp_2022 = df_pbp_2022[veh_columns_out]
df_pbp_2022.insert(0, "NAME", "Pittsburg/Bay Point")
df_pbp_2022.insert(0, "year", 2022)

df_out_pbp = pd.concat([df_pbp_2017, df_pbp_2022])

In [25]:
# El Cerrito
df_ec_2017 = get_place_precombo(2017, 'place', '21796')
df_ec_2017 = df_ec_2017[veh_columns_out]
df_ec_2017.insert(0, "NAME", "El Cerrito")
df_ec_2017.insert(0, "year", 2017)
df_ec_2022 = get_place_precombo(2022, 'place', '21796')
df_ec_2022 = df_ec_2022[veh_columns_out]
df_ec_2022.insert(0, "NAME", "El Cerrito")
df_ec_2022.insert(0, "year", 2022)

df_out_ec = pd.concat([df_ec_2017, df_ec_2022])

# Lafayette
df_la_2017 = get_place_precombo(2017, 'place', '39122')
df_la_2017 = df_la_2017[veh_columns_out]
df_la_2017.insert(0, "NAME", "Lafayette")
df_la_2017.insert(0, "year", 2017)
df_la_2022 = get_place_precombo(2022, 'place', '39122')
df_la_2022 = df_la_2022[veh_columns_out]
df_la_2022.insert(0, "NAME", "Lafayette")
df_la_2022.insert(0, "year", 2022)

df_out_la = pd.concat([df_la_2017, df_la_2022])

In [31]:
df_out = pd.concat([df_out_la,df_out_ec,df_out_pbp,df_out_richmond])
df_out.to_csv('vehicle/vehicles.csv', index=False)

In [30]:
df_out

Unnamed: 0,year,NAME,total,total_moe,pct_no_vehicle,pct_no_vehicle_moe,pct_one_vehicle,pct_one_vehicle_moe,pct_two_or_more,pct_two_or_more_moe
0,2017,Lafayette,11732.0,533.0,,,0.123253,0.022147,0.852625,0.048247
0,2022,Lafayette,11694.0,569.0,,,0.127587,0.031374,0.864118,0.0839
0,2017,El Cerrito,12253.0,449.0,0.020811,0.007552,0.307435,0.027046,0.671754,0.034221
0,2022,El Cerrito,13332.0,514.0,0.039004,0.013644,0.263201,0.02911,0.697795,0.045352
0,2017,Pittsburg/Bay Point,40547.0,1230.09146,0.020199,0.006116,0.14625,0.014124,0.833551,0.033984
0,2022,Pittsburg/Bay Point,46627.0,1405.072596,0.020053,0.006193,0.139619,0.026306,0.840329,0.037195
0,2017,Richmond,51850.0,1199.887495,0.042642,0.010669,0.226982,0.015898,0.730376,0.027976
0,2022,Richmond,57520.0,1491.450971,0.02766,0.009716,0.22646,0.021058,0.74588,0.035704
