In [1]:
import time
import pandas as pd
from census import Census
import altair as alt
import numpy as np

# Census API access
api_key = "639f2aedf7c17b164527591258cda00b25249b4b"
c = Census(key=api_key)

In [2]:
# Occupancy: Table B25002
occupy_variables = {
    'B25002_001E': 'total',
    'B25002_001M': 'total_moe',
    'B25002_002E': 'occupied',
    'B25002_002M': 'occupied_moe',
    'B25002_003E': 'vacant',
    'B25002_003M': 'vacant_moe'
}
occupy_columns_out = [
    'total', 'total_moe',
    'pct_occupied', 'pct_occupied_moe',
    'pct_vacant', 'pct_vacant_moe'
]

In [3]:
def combine_occupancy(in_df):
    '''
    Outputs: simplified breakdown with MOEs
    '''
    df = in_df.copy()
    
    for group in ['occupied', 'vacant']:
        # Calculate the proportion for this group
        df[f'pct_{group}'] = df[group] / df['total']
    
        # Calculate the MOE for this proportion
        df[f'pct_{group}_moe'] = (df[f'{group}_moe']**2 - df[f'pct_{group}']**2 * df['total_moe']**2)**0.5 / df['total']

        #NaN-out any too-low absolute n
        df.loc[df['total'] < 25, f'pct_{group}'] = float('NaN')
        df.loc[df['total'] < 25, f'pct_{group}_moe'] = float('NaN')
        
        #NaN-out any too-low moe
        df[f'pct_{group}_moe_ratio'] = df[f'pct_{group}_moe']/df[f'pct_{group}']
        df.loc[df[f'pct_{group}_moe_ratio'] > .4, f'pct_{group}'] = float('NaN')
        df.loc[df[f'pct_{group}_moe_ratio'] > .4, f'pct_{group}_moe'] = float('NaN')
    
    return df

In [4]:
def combine_tracts(in_df):
    '''

    Outputs:
    A modified version of in_df with census tracts combined
    '''
    
    # Start by making a copy of in_df, so we don't destroy the original data.
    # df is also a nice short name we can use throughout this function
    df_copy = in_df.copy()
    df = pd.DataFrame(columns=occupy_variables.values())

    ### CLEAN UNUSUAL MOES
    df_copy = df_copy.replace(-555555555.0, 0)

    for c in occupy_variables.values():
        if "moe" not in c:
            # sum the totals
            df.at[0, c] = df_copy[c].sum()
        else:
            # sum of squares the moes
            df.at[0, c] = np.sqrt((df_copy[c]**2).sum())

    df_out = combine_occupancy(df)
    
    return df_out

In [5]:
def get_occupy_precombo(year_in, place_num):
    # for getting mode for census tracts before they are combined
    df = pd.DataFrame(
        c.acs5.get(
            list(occupy_variables.keys()),
            {'for': place_num, 'in': 'state:06 county:013'},
            year=year_in
        )
    )
    df = df.rename(columns=occupy_variables)
    df = df.drop(columns=["state", "county", "tract"])
    
    return df

In [6]:
def get_city_df(year_in):
    # City
    df = pd.DataFrame(
        c.acs5.get(
            list(occupy_variables.keys()),
            {'for': 'place:60620', 'in': 'state:06'},
            year=year_in
        )
    )
    df = df.rename(columns=occupy_variables)
    df_out = combine_occupancy(df)

    return df_out

In [7]:
# for 5yr ACS 2014 and 2019
# Get ACS Table B08141 for select tracts around the BART station
# county:013
# city: 60620
c_tracts = 'tract:375000, 376000, 377000, 374000, 381000'

df_tracts_2014 = get_occupy_precombo(2014, c_tracts)
df_tracts_2019 = get_occupy_precombo(2019, c_tracts)

In [20]:
df_tracts_2019

Unnamed: 0,total,total_moe,occupied,occupied_moe,vacant,vacant_moe
0,1291.0,41.0,1231.0,59.0,60.0,51.0
1,2026.0,82.0,1644.0,110.0,382.0,78.0
2,2466.0,87.0,2265.0,141.0,201.0,122.0
3,1841.0,74.0,1771.0,90.0,70.0,63.0
4,2182.0,64.0,2001.0,105.0,181.0,94.0


In [8]:
df_comb_2014 = combine_tracts(df_tracts_2014)
df_tracts_out_2014 = df_comb_2014[occupy_columns_out]
df_tracts_out_2014.insert(0, "year", 2014)
df_comb_2019 = combine_tracts(df_tracts_2019)
df_tracts_out_2019 = df_comb_2019[occupy_columns_out]
df_tracts_out_2019.insert(0, "year", 2019)

In [9]:
# for 5yr ACS 2017 and 2022
df_tracts_2017 = get_occupy_precombo(2017, c_tracts)
df_tracts_2022 = get_occupy_precombo(2022, c_tracts)

In [10]:
df_comb_2017 = combine_tracts(df_tracts_2017)
df_tracts_out_2017 = df_comb_2017[occupy_columns_out]
df_tracts_out_2017.insert(0, "year", 2017)
df_comb_2022 = combine_tracts(df_tracts_2022)
df_tracts_out_2022 = df_comb_2022[occupy_columns_out]
df_tracts_out_2022.insert(0, "year", 2022)

In [11]:
df_tracts_out_2017

Unnamed: 0,year,total,total_moe,pct_occupied,pct_occupied_moe,pct_vacant,pct_vacant_moe
0,2017,9692.0,135.321839,0.899608,0.022033,0.100392,0.022091


In [12]:
# for 5yr ACS 2014 and 2019
# Get ACS Table B08141 in Richmond City
df_city_2014 = get_city_df(2014)
df_city_2019 = get_city_df(2019)

In [13]:
df_city_out_2014 = df_city_2014[occupy_columns_out]
df_city_out_2014.insert(0, "year", 2014)
df_city_out_2019 = df_city_2019[occupy_columns_out]
df_city_out_2019.insert(0, "year", 2019)

In [14]:
# for 5yr ACS 2017 and 2022
# Get ACS Table B08141 in Richmond City
df_city_2017 = get_city_df(2017)
df_city_2022 = get_city_df(2022)

In [15]:
df_city_out_2017 = df_city_2017[occupy_columns_out]
df_city_out_2017.insert(0, "year", 2017)
df_city_out_2022 = df_city_2022[occupy_columns_out]
df_city_out_2022.insert(0, "year", 2022)

In [16]:
df_city_out_2014

Unnamed: 0,year,total,total_moe,pct_occupied,pct_occupied_moe,pct_vacant,pct_vacant_moe
0,2014,39772.0,736.0,0.915544,0.007751,0.084456,0.009706


In [17]:
#export settings, for a combined csv
df_tracts_out = pd.concat([df_tracts_out_2017, df_tracts_out_2022])
df_city_out = pd.concat([df_city_out_2017, df_city_out_2022])

In [18]:
df_tracts_out

Unnamed: 0,year,total,total_moe,pct_occupied,pct_occupied_moe,pct_vacant,pct_vacant_moe
0,2017,9692.0,135.321839,0.899608,0.022033,0.100392,0.022091
0,2022,10266.0,484.949482,0.945256,0.021722,0.054744,0.016365


In [19]:
df_city_out

Unnamed: 0,year,total,total_moe,pct_occupied,pct_occupied_moe,pct_vacant,pct_vacant_moe
0,2017,39534.0,751.0,0.92872,,0.07128,0.011556
0,2022,41566.0,931.0,0.953135,0.008492,0.046865,0.007651
