In [1]:
import pandas as pd
import json
import requests
import math
import numpy as np
import urllib
import os

In [2]:
import utilcalcs as calc
import cen_geo_agg as geo

In [3]:
CensusAPI = os.environ.get('Census_API')

In [4]:
#My search parameters
year = '2018'
counties = ['005','047','061','081','085']
cols = f'B01001_001E,B01001_001M,group(B03002)' #add other tables here for other demo variables

source = 'acs/acs5'

In [5]:
def get_data(geo):
    frames = []
    for c in counties:
        if geo == 'cbg':
            url = f'https://api.census.gov/data/{year}/{source}?get={cols}&for=block%20group:*&in=state:36%20county:{c}&key={CensusAPI}'
            resp = requests.request('GET', url).content
            df = pd.DataFrame(json.loads(resp)[1:])
            df.columns = json.loads(resp)[0]
            frames.append(df)
        else:
            url = f'https://api.census.gov/data/{year}/{source}?get={cols}&for=county:{c}&in=state:36&key={CensusAPI}'
            resp = requests.request('GET', url).content
            df = pd.DataFrame(json.loads(resp)[1:])
            df.columns = json.loads(resp)[0]
            frames.append(df)
    df = pd.concat(frames,sort=True)
    return df

def clean_data(df,var):
    dff = df[var].copy()
    var_num = var[1:]
    for col in var_num:
        dff[col] = dff[col].astype(float)
    dff = dff.replace([999999999, 555555555, 333333333, 222222222,\
                    666666666, 888888888, -999999999, -555555555,\
                    -333333333, -222222222, -666666666, -888888888], 0)
    return dff

In [6]:
## Total Population
Pop = ['B01001_001E','B01001_001M']

## Race - Mutually Exclusive (i.e. "Alone") 
White = ['B03002_003E','B03002_003M']
Black = ['B03002_004E','B03002_004M']
Hispanic = ['B03002_012E','B03002_012M']
Asian = ['B03002_006E','B03002_006M']
Other_E = ['B03002_005E','B03002_007E','B03002_008E','B03002_009E'] #separate estimate & moe for calcs
Other_M = ['B03002_005M','B03002_007M','B03002_008M','B03002_009M']

Race = White + Black + Hispanic + Asian + Other_E + Other_M

#other socioeconomic variables here
##
##
##


var_data = ['GEO_ID'] + Pop + Race # + other variables

### Make CBG level table for aggregations

In [7]:
df_cbg = get_data('cbg')
df_cbg = clean_data(df_cbg,var_data)

In [9]:
#Population
df_cbg['Pop_E'] = df_cbg['B01001_001E']
df_cbg['Pop_M'] = df_cbg['B01001_001M']
df_cbg['Pop_C'] = df_cbg.apply(lambda x: (calc.get_cv(x['Pop_E'],x['Pop_M'])),axis=1)

#Race
df_cbg['White_E'] = df_cbg['B03002_003E']
df_cbg['White_M'] = df_cbg['B03002_003M']
df_cbg['White_C'] = df_cbg.apply(lambda x: (calc.get_cv(x['White_E'],x['White_M'])),axis=1)
df_cbg['Black_E'] = df_cbg['B03002_004E']
df_cbg['Black_M'] = df_cbg['B03002_004M']
df_cbg['Black_C'] = df_cbg.apply(lambda x: (calc.get_cv(x['Black_E'],x['Black_M'])),axis=1)
df_cbg['Hispanic_E'] = df_cbg['B03002_012E']
df_cbg['Hispanic_M'] = df_cbg['B03002_012M']
df_cbg['Hispanic_C'] = df_cbg.apply(lambda x: (calc.get_cv(x['Hispanic_E'],x['Hispanic_M'])),axis=1)
df_cbg['Asian_E'] = df_cbg['B03002_006E']
df_cbg['Asian_M'] = df_cbg['B03002_006M']
df_cbg['Asian_C'] = df_cbg.apply(lambda x: (calc.get_cv(x['Asian_E'],x['Asian_M'])),axis=1)
df_cbg['Other_E'] = df_cbg.loc[:,Other_E].sum(axis=1)
df_cbg['Other_M'] = df_cbg.apply(lambda x: (calc.get_moe(x[Other_M])),axis=1)
df_cbg['Other_C'] = df_cbg.apply(lambda x: (calc.get_cv(x['Other_E'],x['Other_M'])),axis=1)

#Other variables

In [11]:
df_cbg['orig_cbg'] = df_cbg['GEO_ID'].str[9:]
df_cbg = df_cbg.drop(var_data,axis=1)

### Make table for geo aggregations

In [13]:
dff = df_cbg.copy()

In [14]:
geo_xwalk = pd.read_excel('../data/nyc_geo_xwalk.xlsx')
geo_xwalk['orig_cbg'] = geo_xwalk['orig_cbg'].apply(str)

In [15]:
dff = geo_xwalk.merge(dff,on='orig_cbg').drop(columns=['Pop_10E','orig_st','orig_co','orig_stco'])

### Final CBG Table

In [16]:
df_cbg.set_index('orig_cbg',inplace=True)

In [None]:
df_cbg.to_excel("cbg table")

### NTA Table

In [19]:
df_nta = dff.copy().drop(columns=['orig_cbg','orig_ctract','orig_puma','orig_subbor'])
df_nta.head()

Unnamed: 0,orig_nta,Pop_E,Pop_M,Pop_C,White_E,White_M,White_C,Black_E,Black_M,Black_C,Hispanic_E,Hispanic_M,Hispanic_C,Asian_E,Asian_M,Asian_C,Other_E,Other_M,Other_C
0,BX98,0.0,12.0,0.0,0.0,12.0,0.0,0.0,12.0,0.0,0.0,12.0,0.0,0.0,12.0,0.0,0.0,24.0,0.0
1,BX98,7080.0,290.0,2.489997,538.0,160.0,18.078892,3984.0,270.0,4.119823,2329.0,248.0,6.473159,125.0,51.0,24.802432,104.0,55.767374,32.597249
2,BX09,0.0,12.0,0.0,0.0,12.0,0.0,0.0,12.0,0.0,0.0,12.0,0.0,0.0,12.0,0.0,0.0,24.0,0.0
3,BX09,1919.0,595.0,18.848469,13.0,19.0,88.847323,468.0,393.0,51.048243,1438.0,557.0,23.546719,0.0,12.0,0.0,0.0,24.0,0.0
4,BX09,1703.0,497.0,17.740908,52.0,69.0,80.664017,238.0,140.0,35.758984,1327.0,455.0,20.843688,0.0,12.0,0.0,86.0,135.60236,95.852378


In [20]:
df_nta = geo.calculate_sumgeo(df_nta,'orig_nta')
df_nta.set_index('orig_nta',inplace=True)

In [21]:
df_nta.head()

Unnamed: 0_level_0,Pop_E,Pop_M,Pop_C,Other_E,Other_M,Other_C,Black_E,Black_M,Black_C,Hispanic_E,Hispanic_M,Hispanic_C,Asian_E,Asian_M,Asian_C,White_E,White_M,White_C
orig_nta,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
BX98,7080.0,290.24817,2.492128,104.0,60.712437,35.487747,3984.0,270.266535,4.12389,2329.0,248.290153,6.480733,125.0,52.392748,25.479756,538.0,160.449369,18.129667
BX09,54925.0,2911.874654,3.222825,809.0,357.158228,26.837758,18751.0,1923.851086,6.237077,33187.0,2416.239227,4.425945,692.0,249.545587,21.921885,1486.0,512.266532,20.956139
BX39,54163.0,2623.718163,2.944751,511.0,272.560085,32.424662,15147.0,1539.595726,6.178943,37263.0,2305.397146,3.760989,237.0,148.801882,38.16754,1005.0,339.255066,20.520804
BX99,831.0,324.450304,23.734564,120.0,171.735261,86.998613,324.0,170.334964,31.958979,296.0,142.765542,29.320123,3.0,36.345564,736.485586,88.0,117.736995,81.332547
BX55,37130.0,2067.697512,3.385292,1078.0,434.427209,24.498097,7982.0,977.520844,7.444721,23717.0,1800.647939,4.615334,3222.0,808.643927,15.256886,1131.0,379.189926,20.381131


### PUMA Table

In [None]:
df_puma = dff.copy().drop(columns=['orig_cbg','orig_ctract','orig_nta','orig_subbor'])
df_puma = geo.calculate_sumgeo(df_puma,'orig_puma')
df_puma.set_index('orig_puma',inplace=True)

In [None]:
df_puma.head()

## County Table

In [None]:
df_county = get_data('county')
df_county = clean_data(df_county,var_data)

In [None]:
#Population
df_county['Pop_E'] = df_county['B01001_001E']
df_county['Pop_M'] = df_county['B01001_001M']
df_county['Pop_C'] = df_county.apply(lambda x: (calc.get_cv(x['Pop_E'],x['Pop_M'])),axis=1)

#Race
df_county['White_E'] = df_county['B03002_003E']
df_county['White_M'] = df_county['B03002_003M']
df_county['White_C'] = df_county.apply(lambda x: (calc.get_cv(x['White_E'],x['White_M'])),axis=1)
df_county['Black_E'] = df_county['B03002_004E']
df_county['Black_M'] = df_county['B03002_004M']
df_county['Black_C'] = df_county.apply(lambda x: (calc.get_cv(x['Black_E'],x['Black_M'])),axis=1)
df_county['Hispanic_E'] = df_county['B03002_012E']
df_county['Hispanic_M'] = df_county['B03002_012M']
df_county['Hispanic_C'] = df_county.apply(lambda x: (calc.get_cv(x['Hispanic_E'],x['Hispanic_M'])),axis=1)
df_county['Asian_E'] = df_county['B03002_006E']
df_county['Asian_M'] = df_county['B03002_006M']
df_county['Asian_C'] = df_county.apply(lambda x: (calc.get_cv(x['Asian_E'],x['Asian_M'])),axis=1)
df_county['Other_E'] = df_county.loc[:,Other_E].sum(axis=1)
df_county['Other_M'] = df_county.apply(lambda x: (calc.get_moe(x[Other_M])),axis=1)
df_county['Other_C'] = df_county.apply(lambda x: (calc.get_cv(x['Other_E'],x['Other_M'])),axis=1)

#Other variables

In [None]:
df_county['orig_stco'] = df_county['GEO_ID'].str[9:]
df_county = df_county.drop(var_data,axis=1)
df_county.set_index('orig_stco',inplace=True)

In [None]:
df_county.head()