In [1]:
import pandas as pd
import requests

In [2]:
inflation_rate = .0676

lastest_year = '2017/acs'

oldest_year = '2012'

In [3]:
years = ['2012', '2017']
top_geogs = ['us:1', 'state:34']

In [4]:
def create_top_dfs():
    my_data = {}
    for n in years:
        for d in top_geogs:
            url = 'https://api.census.gov/data/'+ n +'/acs/acs5?get=B19013_001E,NAME&for=' + d +  '&key=37e80b2543b3d018cfbeb06c5dca27cf33e789e7'
            r = requests.get(url)
            x = r.json()
            df = pd.DataFrame(x[1:],columns=x[0])
            raw_result = pd.to_numeric(df['B19013_001E'][0])
            if n == '2012':
                my_data[n + d] = raw_result * .067 + raw_result
            else:
                my_data[n + d] = raw_result
    return my_data

In [5]:
top_df_dict = create_top_dfs()

In [6]:
us_previous = top_df_dict['2012us:1']
us_latest = top_df_dict['2017us:1']
nj_previous = top_df_dict['2012state:34']
nj_latest = top_df_dict['2017state:34']

In [7]:
def create_local_dfs(year, geography):
    df_list = []
    url = 'https://api.census.gov/data/'+ year +'/acs5?get=NAME,B19013_001E,B19013_001M&for=' + geography +  '&in=state:34&key=37e80b2543b3d018cfbeb06c5dca27cf33e789e7'
    r = requests.get(url)
    x = r.json()
    df = pd.DataFrame(x[1:], columns=x[0])
    df['B19013_001E'] = df.B19013_001E.replace(to_replace="\+", value='', regex=True)
    df['B19013_001E'] = pd.to_numeric(df.B19013_001E, errors='coerce')
    df['B19013_001M'] = pd.to_numeric(df.B19013_001M, errors='coerce')
    return df

In [8]:
def clean_local_dfs(year, label):
#     for n in years:
        muni = create_local_dfs(year, 'county%20subdivision:*')
        cty = create_local_dfs(year, 'county:*')
        median_income = muni.append(cty).reset_index().drop(labels='index', axis=1)
        median_income['county subdivision'] = median_income['county subdivision'].fillna('')
        median_income['GEO.id2'] = median_income.state + median_income.county + median_income['county subdivision']
        median_income = median_income[[u'B19013_001E', u'B19013_001M', u'NAME',
                  u'GEO.id2']]
        median_income['MOE_per'] = (median_income.B19013_001M / median_income.B19013_001E) * 100
        median_income['GEOLevel1'] = median_income['NAME'].str.split(', ', expand=True)[0]
        median_income['GEOLevel2'] = median_income['NAME'].str.split(', ', expand=True)[1]
        median_income.columns = ['Median_income_'+ label, 'MOE_' + label, 'GEOdisplaylabel', 'GEO.id2', 'MOE_per_' + label, 'GEOLevel1', 'GEOLevel2']
        median_income['GEOdisplaylabel'] = median_income['GEOdisplaylabel'].replace(to_replace=', New Jersey', value='', regex=True)
        if label == 'prev':
            median_income['Median_income_prev_winf'] = median_income['Median_income_prev'] * inflation_rate + median_income['Median_income_prev']
        return median_income

In [9]:
median_income_latest = clean_local_dfs(lastest_year, 'latest')

In [10]:
median_income_prev = clean_local_dfs(oldest_year, 'prev')

In [11]:
#Merge Household Income
median_income_merge = pd.merge(median_income_latest, median_income_prev, left_on='GEO.id2', right_on='GEO.id2', how='outer')

In [12]:
median_income_merge.head()

Unnamed: 0,Median_income_latest,MOE_latest,GEOdisplaylabel_x,GEO.id2,MOE_per_latest,GEOLevel1_x,GEOLevel2_x,Median_income_prev,MOE_prev,GEOdisplaylabel_y,MOE_per_prev,GEOLevel1_y,GEOLevel2_y,Median_income_prev_winf
0,175684.0,11669.0,"West Windsor township, Mercer County",3402180240,6.642039,West Windsor township,Mercer County,150694.0,15596.0,"West Windsor township, Mercer County",10.34945,West Windsor township,Mercer County,160880.9144
1,85636.0,4399.0,"East Windsor township, Mercer County",3402119780,5.136858,East Windsor township,Mercer County,85831.0,4867.0,"East Windsor township, Mercer County",5.670445,East Windsor township,Mercer County,91633.1756
2,113125.0,23423.0,"Hopewell borough, Mercer County",3402133150,20.705414,Hopewell borough,Mercer County,101688.0,8510.0,"Hopewell borough, Mercer County",8.368736,Hopewell borough,Mercer County,108562.1088
3,72973.0,12408.0,"Hightstown borough, Mercer County",3402131620,17.003549,Hightstown borough,Mercer County,70806.0,9088.0,"Hightstown borough, Mercer County",12.83507,Hightstown borough,Mercer County,75592.4856
4,129320.0,8997.0,"Hopewell township, Mercer County",3402133180,6.957161,Hopewell township,Mercer County,144973.0,13380.0,"Hopewell township, Mercer County",9.229305,Hopewell township,Mercer County,154773.1748


In [13]:
median_income_merge = median_income_merge.drop(['GEOdisplaylabel_y', 'GEOLevel1_x', 'GEOLevel2_x'], axis=1)
median_income_merge.rename(columns={'GEOdisplaylabel_x': 'GEOdisplaylabel', 'GEOLevel1_y': 'GEOLevel1','GEOLevel2_y':'GEOLevel2', 'GEO.id_x': 'GEO.id', 'GEOdisplaylabel_long_x': 'GEOdisplaylabel_long'}, inplace=True)

In [14]:
#Take out County subdivisions not defined
median_income_merge = median_income_merge[median_income_merge['GEOdisplaylabel'].str.contains("County subdivisions not defined") == False]
median_income_merge = median_income_merge.sort_values(by=['GEO.id2'])

In [15]:
#Add NJ and Nationwide
median_income_merge['nj_median_income_latest'] = nj_latest
median_income_merge['nj_median_income_prev'] = nj_previous
median_income_merge['usa_median_income_latest'] = us_latest
median_income_merge['usa_median_income_prev'] = us_previous
median_income_merge['median_income_per_change'] = ((median_income_merge['Median_income_latest'] - median_income_merge['Median_income_prev_winf']) / median_income_merge['Median_income_prev_winf']) * 100
median_income_merge['nj_median_income_change'] = ((median_income_merge['nj_median_income_latest'] - median_income_merge['nj_median_income_prev']) / median_income_merge['nj_median_income_prev']) * 100
median_income_merge['us_median_income_change'] = ((median_income_merge['usa_median_income_latest'] - median_income_merge['usa_median_income_prev']) / median_income_merge['usa_median_income_prev']) * 100


In [16]:
median_income_merge = median_income_merge[[ u'GEOdisplaylabel', u'GEO.id2',
        u'GEOLevel1', u'GEOLevel2',u'Median_income_latest', u'MOE_latest', u'MOE_per_latest',
         u'Median_income_prev',  u'MOE_prev', u'MOE_per_prev', u'Median_income_prev_winf',
       u'nj_median_income_latest', u'nj_median_income_prev',
       u'usa_median_income_latest', u'usa_median_income_prev',
       u'median_income_per_change', u'nj_median_income_change',
       u'us_median_income_change']]

In [17]:
median_income_merge.head()

Unnamed: 0,GEOdisplaylabel,GEO.id2,GEOLevel1,GEOLevel2,Median_income_latest,MOE_latest,MOE_per_latest,Median_income_prev,MOE_prev,MOE_per_prev,Median_income_prev_winf,nj_median_income_latest,nj_median_income_prev,usa_median_income_latest,usa_median_income_prev,median_income_per_change,nj_median_income_change,us_median_income_change
577,Atlantic County,34001,Atlantic County,New Jersey,57514.0,1710.0,2.973189,54559.0,1127.0,2.065654,58247.1884,76475,76436.679,57652,56600.082,-1.258753,0.050134,1.85851
98,"Absecon city, Atlantic County",3400100100,Absecon city,Atlantic County,63887.0,10922.0,17.09581,63602.0,5330.0,8.38024,67901.4952,76475,76436.679,57652,56600.082,-5.912234,0.050134,1.85851
121,"Atlantic City city, Atlantic County",3400102080,Atlantic City city,Atlantic County,26006.0,1641.0,6.310082,29886.0,1923.0,6.434451,31906.2936,76475,76436.679,57652,56600.082,-18.49257,0.050134,1.85851
109,"Brigantine city, Atlantic County",3400107810,Brigantine city,Atlantic County,65724.0,6858.0,10.434544,63119.0,4369.0,6.921846,67385.8444,76475,76436.679,57652,56600.082,-2.466162,0.050134,1.85851
112,"Buena borough, Atlantic County",3400108680,Buena borough,Atlantic County,50804.0,14991.0,29.507519,48722.0,15380.0,31.566849,52015.6072,76475,76436.679,57652,56600.082,-2.329315,0.050134,1.85851


In [18]:
median_income_merge.to_csv('median_income_data_towns_clean.csv', header=True, index=False)