In [78]:
import time
import pandas as pd
from census import Census
import altair as alt
import numpy as np

# Census API access
api_key = "639f2aedf7c17b164527591258cda00b25249b4b"
c = Census(key=api_key)

## Race
What is the racial and ethnic percentages of neighborhood group vs city? Are there any census tracts with MOEs that are too high to include?

In [32]:
# Define the dict of variables to pull and rename
race_variables = {
    'B03002_001E': 'total',
    'B03002_001M': 'total_moe',
    'B03002_003E': 'nh_white',
    'B03002_003M': 'nh_white_moe',
    'B03002_004E': 'nh_black',
    'B03002_004M': 'nh_black_moe',
    'B03002_005E': 'nh_native',
    'B03002_005M': 'nh_native_moe',
    'B03002_006E': 'nh_asian',
    'B03002_006M': 'nh_asian_moe',
    'B03002_007E': 'nh_pi',
    'B03002_007M': 'nh_pi_moe',
    'B03002_008E': 'nh_1other',
    'B03002_008M': 'nh_1other_moe',
    'B03002_009E': 'nh_multi',
    'B03002_009M': 'nh_multi_moe',
    'B03002_012E': 'hispanic',
    'B03002_012M': 'hispanic_moe',
}

race_columns_out = [
    'pct_nh_white', 'pct_nh_white_moe',
    'pct_nh_black', 'pct_nh_black_moe',
    'pct_nh_asian', 'pct_nh_asian_moe',
    'pct_nh_other', 'pct_nh_other_moe',
    'pct_hispanic', 'pct_hispanic_moe'
]

In [144]:
def z_statistic(x1, x2, moe1, moe2):
    '''

    Inputs:
    - the two values (x1, x2), and their MOEs

    Output:
    The two-sample z-value (float) of the difference between the two values
    '''
    # Calculate the SEs
    se1 = moe1 / 1.645
    se2 = moe2 / 1.645

    # Return the z-value
    return abs((x1 - x2) / (se1**2 + se2**2)**0.5)

In [91]:
def combine_races(in_df):

    df = in_df.copy()
    
    ### AGGREGATE ESTIMATES
    # Define a list of columns to combine into "nh_other"
    nh_other_cols = ['nh_native', 'nh_pi', 'nh_1other', 'nh_multi']

    # Calculate nh_other
    df['nh_other'] = df[nh_other_cols].sum(axis='columns')

    # Use a list comprehension to append "_moe" to all strings in our list
    nh_other_moes = [f'{col}_moe' for col in nh_other_cols]

    # Use this new list to calculate nh_other_moe    
    df['nh_other_moe'] = (df[nh_other_moes]**2).sum(axis='columns')**0.5

    ### CALCULATE PROPORTIONS
    # Iterate through the racial/ethnic groups we're using
    for group in ['nh_white', 'nh_black', 'nh_asian', 'nh_other', 'hispanic']:
        # Calculate the proportion for this group
        df[f'pct_{group}'] = df[group] / df['total']
    
        # Calculate the MOE for this proportion
        df[f'pct_{group}_moe'] = (df[f'{group}_moe']**2 - df[f'pct_{group}']**2 * df['total_moe']**2)**0.5 / df['total']

        #NaN-out any too-low absolute n
        df.loc[df.total < 25, f'pct_{group}'] = float('NaN')
        df.loc[df.total < 25, f'pct_{group}_moe'] = float('NaN')

        #NaN-out any too-low moe
        df[f'pct_{group}_moe_ratio'] = df[f'pct_{group}_moe']/df[f'pct_{group}']
        df.loc[df[f'pct_{group}_moe_ratio'] > .4, f'pct_{group}'] = float('NaN')
        df.loc[df[f'pct_{group}_moe_ratio'] > .4, f'pct_{group}_moe'] = float('NaN')

    return df

In [92]:
def combine_tracts(in_df):
    '''
    Inputs:
    - in_df (pd.DataFrame): a DataFrame containing race/ethnicity data from Table B03002

    Outputs:
    A modified version of in_df with certain races grouped and proportions and margins
    of error calculated
    '''
    
    # Start by making a copy of in_df, so we don't destroy the original data.
    # df is also a nice short name we can use throughout this function
    df_copy = in_df.copy()
    df = pd.DataFrame(columns=race_variables.values())

    ### CLEAN UNUSUAL MOES
    df_copy = df_copy.replace(-555555555.0, 0)

    for c in race_variables.values():
        if "moe" not in c:
            # sum the totals
            df.at[0, c] = df_copy[c].sum()
        else:
            # sum of squares the moes
            df.at[0, c] = np.sqrt((df_copy[c]**2).sum())

    df_out = combine_races(df)
    
    return df_out

In [53]:
def get_race_precombo(year_in, place_num):
    # do the call to the census api for a group of census tracts
    
    df = pd.DataFrame(
        c.acs5.get(
            list(race_variables.keys()),
            {'for': place_num, 'in': 'state:06 county:013'},
            year=year_in
        )
    )
    df = df.rename(columns=race_variables)
    df = df.drop(columns=["state", "county", "tract"])

    return df

In [99]:
# for 5yr ACS 2014 and 2019
# Get ACS Table B03002 in Richmond City
# Get ACS Table B03002 for select tracts around the BART station
# county:013
# city: 60620
c_tracts = 'tract:375000, 376000, 377000, 374000, 381000'

df_tracts_2014 = get_race_precombo(2014, c_tracts)
df_tracts_2019 = get_race_precombo(2019, c_tracts)

In [110]:
df_tracts_combined_2014 = combine_tracts(df_tracts_2014)
df_tracts_out_2014 = df_tracts_combined_2014[race_columns_out]
df_tracts_out_2014.insert(0, "year", 2014)
df_tracts_combined_2019 = combine_tracts(df_tracts_2019)
df_tracts_out_2019 = df_tracts_combined_2019[race_columns_out]
df_tracts_out_2019.insert(0, "year", 2019)

In [104]:
def get_city_df(year_in):
    # City
    df = pd.DataFrame(
        c.acs5.get(
            list(race_variables.keys()),
            {'for': 'place:60620', 'in': 'state:06'},
            year=year_in
        )
    )
    df = df.rename(columns=race_variables)
    df_out = combine_races(df)

    return df_out

In [137]:
df_city_2014_out = get_city_df(2014)
df_city_2019_out = get_city_df(2019)

In [150]:
z_2014 = z_statistic(.2758, .2706, 0.0119, 0.01097)
z_2014

0.5285174104864416

In [120]:
df_city_2014_out = df_city_2014_out[race_columns_out]
df_city_2014_out.insert(0, "year", 2014)
df_city_2019_out = df_city_2019_out[race_columns_out]
df_city_2019_out.insert(0, "year", 2019)

In [121]:
#export settings, for a combined csv
df_tracts_out = pd.concat([df_tracts_out_2014, df_tracts_out_2019])
df_city_out = pd.concat([df_city_2014_out, df_city_2019_out])
#export to csv
# df_out.to_csv('race_pct.csv', index=False)

In [112]:
df_tracts_out

Unnamed: 0,year,pct_nh_white,pct_nh_white_moe,pct_nh_black,pct_nh_black_moe,pct_nh_asian,pct_nh_asian_moe,pct_nh_other,pct_nh_other_moe,pct_hispanic,pct_hispanic_moe
0,2014,0.077397,0.012089,0.215779,0.023045,0.054345,0.012125,0.028126,0.010244,0.624353,0.037259
0,2019,0.079629,0.011785,0.189656,0.022237,0.104076,0.035202,0.032921,0.009707,0.593718,0.033877


In [122]:
df_city_out

Unnamed: 0,year,pct_nh_white,pct_nh_white_moe,pct_nh_black,pct_nh_black_moe,pct_nh_asian,pct_nh_asian_moe,pct_nh_other,pct_nh_other_moe,pct_hispanic,pct_hispanic_moe
0,2014,0.175328,0.010068,0.22708,0.013064,0.139693,0.010143,0.051996,0.008764,0.405902,0.01572
0,2019,0.178088,0.008945,0.195342,0.011193,0.152124,0.012986,0.04978,0.007041,0.424666,0.014721
