In [1]:
import pandas as pd
import json
import requests
import math
import numpy as np
import urllib
import os

In [2]:
import utilcalcs as calc
import cen_geo_agg as geo

In [3]:
CensusAPI = os.environ.get('Census_API')

In [4]:
#My search parameters
year = '2018'
counties = ['005','047','061','081','085']
cols = f'B01001_001E,B01001_001M,group(B03002)' #add other tables here for other demo variables

source = 'acs/acs5'

In [5]:
def get_data(geo):
    frames = []
    for c in counties:
        if geo == 'cbg':
            url = f'https://api.census.gov/data/{year}/{source}?get={cols}&for=block%20group:*&in=state:36%20county:{c}&key={CensusAPI}'
            resp = requests.request('GET', url).content
            df = pd.DataFrame(json.loads(resp)[1:])
            df.columns = json.loads(resp)[0]
            frames.append(df)
        else:
            url = f'https://api.census.gov/data/{year}/{source}?get={cols}for=county:{c}&in=state:36&key={CensusAPI}'
            resp = requests.request('GET', url).content
            df = pd.DataFrame(json.loads(resp)[1:])
            df.columns = json.loads(resp)[0]
            frames.append(df)
    df = pd.concat(frames,sort=True)
    return df

def clean_data(df,var):
    dff = df[var].copy()
    var_num = var[1:]
    for col in var_num:
        dff[col] = dff[col].astype(float)
    dff = dff.replace([999999999, 555555555, 333333333, 222222222,\
                    666666666, 888888888, -999999999, -555555555,\
                    -333333333, -222222222, -666666666, -888888888], 0)
    return dff

In [6]:
## Total Population
Pop = ['B01001_001E','B01001_001M']

## Race - Mutually Exclusive (i.e. "Alone") 
White = ['B03002_003E','B03002_003M']
Black = ['B03002_004E','B03002_004M']
Hispanic = ['B03002_012E','B03002_012M']
Asian = ['B03002_006E','B03002_006M']
Other_E = ['B03002_005E','B03002_007E','B03002_008E','B03002_009E'] #separate estimate & moe for calcs
Other_M = ['B03002_005M','B03002_007M','B03002_008M','B03002_009M']

Race = White + Black + Hispanic + Asian + Other_E + Other_M

#other socioeconomic variables here
##
##
##


var_data = ['GEO_ID'] + Pop + Race # + other variables

In [7]:
dff = get_data('cbg')
dff = clean_data(dff,var_data)

In [8]:
dff.head(20)

Unnamed: 0,GEO_ID,B01001_001E,B01001_001M,B03002_003E,B03002_003M,B03002_004E,B03002_004M,B03002_012E,B03002_012M,B03002_006E,B03002_006M,B03002_005E,B03002_007E,B03002_008E,B03002_009E,B03002_005M,B03002_007M,B03002_008M,B03002_009M
0,1500000US360050245023,1886.0,358.0,11.0,23.0,227.0,134.0,1648.0,401.0,0.0,12.0,0.0,0.0,0.0,0.0,12.0,12.0,12.0,12.0
1,1500000US360050247002,1318.0,204.0,12.0,13.0,630.0,180.0,662.0,198.0,6.0,9.0,0.0,0.0,0.0,8.0,12.0,12.0,12.0,12.0
2,1500000US360050253004,1222.0,298.0,14.0,22.0,45.0,51.0,1155.0,299.0,0.0,12.0,0.0,0.0,0.0,8.0,12.0,12.0,12.0,13.0
3,1500000US360050263005,1941.0,610.0,464.0,291.0,398.0,254.0,891.0,504.0,0.0,12.0,126.0,0.0,27.0,35.0,115.0,12.0,35.0,41.0
4,1500000US360050265004,1191.0,623.0,0.0,12.0,211.0,295.0,892.0,529.0,88.0,147.0,0.0,0.0,0.0,0.0,12.0,12.0,12.0,12.0
5,1500000US360050266025,755.0,318.0,398.0,276.0,33.0,33.0,274.0,196.0,50.0,42.0,0.0,0.0,0.0,0.0,12.0,12.0,12.0,12.0
6,1500000US360050267011,1668.0,448.0,0.0,12.0,96.0,66.0,1056.0,419.0,463.0,236.0,0.0,0.0,0.0,53.0,12.0,12.0,12.0,83.0
7,1500000US360050213023,986.0,542.0,4.0,6.0,364.0,481.0,618.0,442.0,0.0,12.0,0.0,0.0,0.0,0.0,12.0,12.0,12.0,12.0
8,1500000US360050213022,863.0,526.0,0.0,12.0,426.0,460.0,424.0,220.0,13.0,21.0,0.0,0.0,0.0,0.0,12.0,12.0,12.0,12.0
9,1500000US360050213021,1709.0,836.0,0.0,12.0,705.0,657.0,976.0,613.0,28.0,58.0,0.0,0.0,0.0,0.0,12.0,12.0,12.0,12.0


In [9]:
df_cbg = dff.copy()

### Make CBG level table

In [10]:
#Population
df_cbg['Pop_E'] = df_cbg['B01001_001E']
df_cbg['Pop_M'] = df_cbg['B01001_001M']
df_cbg['Pop_C'] = df_cbg.apply(lambda x: (calc.get_cv(x['Pop_E'],x['Pop_M'])),axis=1)

#Race
df_cbg['White_E'] = df_cbg['B03002_003E']
df_cbg['White_M'] = df_cbg['B03002_003M']
df_cbg['White_C'] = df_cbg.apply(lambda x: (calc.get_cv(x['White_E'],x['White_M'])),axis=1)
df_cbg['Black_E'] = df_cbg['B03002_004E']
df_cbg['Black_M'] = df_cbg['B03002_004M']
df_cbg['Black_C'] = df_cbg.apply(lambda x: (calc.get_cv(x['Black_E'],x['Black_M'])),axis=1)
df_cbg['Hispanic_E'] = df_cbg['B03002_012E']
df_cbg['Hispanic_M'] = df_cbg['B03002_012M']
df_cbg['Hispanic_C'] = df_cbg.apply(lambda x: (calc.get_cv(x['Hispanic_E'],x['Hispanic_M'])),axis=1)
df_cbg['Asian_E'] = df_cbg['B03002_006E']
df_cbg['Asian_M'] = df_cbg['B03002_006M']
df_cbg['Asian_C'] = df_cbg.apply(lambda x: (calc.get_cv(x['Asian_E'],x['Asian_M'])),axis=1)
df_cbg['Other_E'] = df_cbg.loc[:,Other_E].sum(axis=1)
df_cbg['Other_M'] = df_cbg.apply(lambda x: (calc.get_moe(x[Other_M])),axis=1)
df_cbg['Other_C'] = df_cbg.apply(lambda x: (calc.get_cv(x['Other_E'],x['Other_M'])),axis=1)

#Other variables

In [11]:
df_cbg = df_cbg.drop(var_data[1:],axis=1)
df_cbg.head()

Unnamed: 0,GEO_ID,Pop_E,Pop_M,Pop_C,White_E,White_M,White_C,Black_E,Black_M,Black_C,Hispanic_E,Hispanic_M,Hispanic_C,Asian_E,Asian_M,Asian_C,Other_E,Other_M,Other_C
0,1500000US360050245023,1886.0,358.0,11.539193,11.0,23.0,127.106936,227.0,134.0,35.885007,1648.0,401.0,14.791808,0.0,12.0,0.0,0.0,24.0,0.0
1,1500000US360050247002,1318.0,204.0,9.409117,12.0,13.0,65.85613,630.0,180.0,17.36865,662.0,198.0,18.181985,6.0,9.0,91.18541,8.0,24.0,182.370821
2,1500000US360050253004,1222.0,298.0,14.824469,14.0,22.0,95.527573,45.0,51.0,68.895643,1155.0,299.0,15.737049,0.0,12.0,0.0,8.0,24.515301,186.286484
3,1500000US360050263005,1941.0,610.0,19.10462,464.0,291.0,38.124934,398.0,254.0,38.795803,891.0,504.0,34.386417,0.0,12.0,0.0,188.0,127.573508,41.251215
4,1500000US360050265004,1191.0,623.0,31.798774,0.0,12.0,0.0,211.0,295.0,84.991141,892.0,529.0,36.051631,88.0,147.0,101.547389,0.0,24.0,0.0


### Make table for geo aggregations

In [12]:
dff = df_cbg.copy()

In [13]:
dff['orig_cbg']=dff['GEO_ID'].str[9:]

In [14]:
geo_xwalk = pd.read_excel('../data/nyc_geo_xwalk.xlsx')
geo_xwalk['orig_cbg'] = geo_xwalk['orig_cbg'].apply(str)

In [15]:
geo_xwalk.head()

Unnamed: 0,Pop_10E,orig_st,orig_co,orig_cbg,orig_ctract,orig_nta,orig_puma,orig_subbor,orig_stco
0,0,36,5,360050001000,36005000100,BX98,3603710,36005CS,36005
1,11091,36,5,360050001001,36005000100,BX98,3603710,36005CS,36005
2,0,36,5,360050002000,36005000200,BX09,3603709,36005CS,36005
3,1120,36,5,360050002001,36005000200,BX09,3603709,36005CS,36005
4,1974,36,5,360050002002,36005000200,BX09,3603709,36005CS,36005


In [16]:
dff = geo_xwalk.merge(dff,on='orig_cbg').drop(columns=['GEO_ID','Pop_10E','orig_st','orig_co','orig_stco'])

### NTA Table

In [23]:
df_nta = dff.copy().drop(columns=['orig_cbg','orig_ctract','orig_puma','orig_subbor'])
df_nta = geo.calculate_sumgeo(df_nta,'orig_nta')
df_nta.set_index('orig_nta',inplace=True)


In [24]:
df_nta.head()

Unnamed: 0_level_0,Asian_E,Asian_M,Asian_C,Pop_E,Pop_M,Pop_C,Black_E,Black_M,Black_C,Other_E,Other_M,Other_C,Hispanic_E,Hispanic_M,Hispanic_C,White_E,White_M,White_C
orig_nta,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
BX98,125.0,52.392748,25.479756,7080.0,290.24817,2.492128,3984.0,270.266535,4.12389,104.0,60.712437,35.487747,2329.0,248.290153,6.480733,538.0,160.449369,18.129667
BX09,692.0,249.545587,21.921885,54925.0,2911.874654,3.222825,18751.0,1923.851086,6.237077,809.0,357.158228,26.837758,33187.0,2416.239227,4.425945,1486.0,512.266532,20.956139
BX39,237.0,148.801882,38.16754,54163.0,2623.718163,2.944751,15147.0,1539.595726,6.178943,511.0,272.560085,32.424662,37263.0,2305.397146,3.760989,1005.0,339.255066,20.520804
BX99,3.0,36.345564,736.485586,831.0,324.450304,23.734564,324.0,170.334964,31.958979,120.0,171.735261,86.998613,296.0,142.765542,29.320123,88.0,117.736995,81.332547
BX55,3222.0,808.643927,15.256886,37130.0,2067.697512,3.385292,7982.0,977.520844,7.444721,1078.0,434.427209,24.498097,23717.0,1800.647939,4.615334,1131.0,379.189926,20.381131


### PUMA Table

In [None]:
df_puma = dff.copy().drop(columns=['orig_cbg','orig_ctract','orig_nta','orig_subbor'])
df_puma = geo.calculate_sumgeo(df_nta,'orig_puma')
df_puma.set_index('orig_puma',inplace=True)

In [None]:
df_puma.head()