In [17]:
#importing necessary libraries
import pandas as pd
import requests

In [18]:
#variables that will change each YEAR

#monetary values only
inflation_rate = .0676

#years you need
lastest_year = '2017/acs'
oldest_year = '2012'

#please change these as well
years = ['2012', '2017']

#ignore this
top_geogs = ['us:1', 'state:34']

In [19]:
#variables that will change each DATASET

#name of the variable you need and its margin of error (from https://api.census.gov/data/2017/acs/acs5/subject/variables.html)
api_variable = 'B25077_001E'
api_moe = 'B25077_001M'

#give it a name so that the column names line up right. Lowercase plz
api_name = 'home_value'

In [20]:
#this pulls the data you need for country and state columns
def create_top_dfs():
    my_data = {}
    for n in years:
        for d in top_geogs:
            url = 'https://api.census.gov/data/'+ n +'/acs/acs5?get='+ api_variable + ',NAME&for=' + d +  '&key=37e80b2543b3d018cfbeb06c5dca27cf33e789e7'
            r = requests.get(url)
            x = r.json()
            df = pd.DataFrame(x[1:],columns=x[0])
            df = df.replace(to_replace=['-555555555.0','-555555555',], value='')
            raw_result = pd.to_numeric(df[api_variable][0])
            if n == '2012':
                my_data[n + d] = raw_result * inflation_rate + raw_result
            my_data[n + d] = raw_result
    return my_data

In [21]:
top_df_dict = create_top_dfs()

#set the absolute values you will use later on
us_previous = top_df_dict['2012us:1']
us_latest = top_df_dict['2017us:1']
nj_previous = top_df_dict['2012state:34']
nj_latest = top_df_dict['2017state:34']

In [22]:
#pulls the data for the years and geography level you specify
def create_local_dfs(year, geography):
    df_list = []
    url = 'https://api.census.gov/data/'+ year +'/acs5?get=NAME,'+ api_variable +',' + api_moe + '&for=' + geography +  '&in=state:34&key=37e80b2543b3d018cfbeb06c5dca27cf33e789e7'
    print url
    r = requests.get(url)
    x = r.json()
    df = pd.DataFrame(x[1:], columns=x[0])
    #some null values will return as large negative numbers
    df = df.replace(to_replace=['-555555555.0','-555555555',], value='')
    #all variables returned as strings at first
    df[api_variable] = pd.to_numeric(df[api_variable], errors='coerce')
    df[api_moe] = pd.to_numeric(df[api_moe], errors='coerce')
    return df

In [23]:
#data cleaning function
def clean_local_dfs(year, label):
        muni = create_local_dfs(year, 'county%20subdivision:*')
        cty = create_local_dfs(year, 'county:*')
        #combines muni and county data
        local_df = muni.append(cty).reset_index().drop(labels='index', axis=1)
        local_df['county subdivision'] = local_df['county subdivision'].fillna('')
        #create FIPS code, since it initially returns as separate columns
        local_df['GEO.id2'] = local_df.state + local_df.county + local_df['county subdivision']
        local_df = local_df[[api_variable, api_moe, u'NAME',
                  u'GEO.id2']]
        #create "geography levels" for separate muni, county columns
        local_df['GEOLevel1'] = local_df['NAME'].str.split(', ', expand=True)[0]
        local_df['GEOLevel2'] = local_df['NAME'].str.split(', ', expand=True)[1]
        #calculate margin of errors's percent of value
        local_df['moe_pct'] = (local_df[api_moe] / local_df[api_variable]) * 100
        #rename from api variable to nice topic name
        local_df = local_df.rename(columns={api_variable: api_name, api_moe: api_name + '_moe', 'NAME':'GEOdisplaylabel'})
        local_df['GEOdisplaylabel'] = local_df['GEOdisplaylabel'].replace(to_replace=', New Jersey', value='', regex=True)
        if label == 'prev':
            local_df[api_name + '_prev_winf'] = local_df[api_name] * inflation_rate + local_df[api_name]
        return local_df

In [24]:
var_latest = clean_local_dfs(lastest_year, 'latest')
var_prev = clean_local_dfs(oldest_year, 'prev')

https://api.census.gov/data/2017/acs/acs5?get=NAME,B25077_001E,B25077_001M&for=county%20subdivision:*&in=state:34&key=37e80b2543b3d018cfbeb06c5dca27cf33e789e7
https://api.census.gov/data/2017/acs/acs5?get=NAME,B25077_001E,B25077_001M&for=county:*&in=state:34&key=37e80b2543b3d018cfbeb06c5dca27cf33e789e7
https://api.census.gov/data/2012/acs5?get=NAME,B25077_001E,B25077_001M&for=county%20subdivision:*&in=state:34&key=37e80b2543b3d018cfbeb06c5dca27cf33e789e7
https://api.census.gov/data/2012/acs5?get=NAME,B25077_001E,B25077_001M&for=county:*&in=state:34&key=37e80b2543b3d018cfbeb06c5dca27cf33e789e7


In [25]:
#Merge 
var_merge = pd.merge(var_latest, var_prev, left_on='GEO.id2', right_on='GEO.id2', how='outer', suffixes=('_latest', '_prev'))

In [26]:
#drop unneccessary columns and rename ones that need it
var_merge = var_merge.drop(['GEOdisplaylabel_prev', 'GEOLevel1_latest', 'GEOLevel2_latest'], axis=1)
var_merge.rename(columns={'GEOdisplaylabel_latest': 'GEOdisplaylabel', 'GEOLevel1_prev': 'GEOLevel1','GEOLevel2_prev':'GEOLevel2', 'GEO.id_latest': 'GEO.id', 'GEOdisplaylabel_long_x': 'GEOdisplaylabel_long'}, inplace=True)

In [27]:
#Take out County subdivisions not defined
var_merge = var_merge[var_merge['GEOdisplaylabel'].str.contains("County subdivisions not defined") == False]
var_merge = var_merge.sort_values(by=['GEO.id2'])

In [28]:
#Add NJ and Nationwide
var_merge['nj_' + api_name+'_latest'] = nj_latest
var_merge['nj_' + api_name+'_prev'] = nj_previous
var_merge['usa_' + api_name+'_latest'] = us_latest
var_merge['usa_' + api_name+'_prev'] = us_previous
var_merge[api_name+'_per_change'] = ((var_merge[api_name+'_latest'] - var_merge[api_name+'_prev']) / var_merge[api_name+'_prev']) * 100
var_merge['nj_' + api_name+'_change'] = ((var_merge['nj_' + api_name+'_latest'] - var_merge['nj_' + api_name+'_prev']) / var_merge['nj_' + api_name+'_prev']) * 100
var_merge['us_' + api_name+'_change'] = ((var_merge['usa_' + api_name+'_latest'] - var_merge['usa_' + api_name+'_prev']) / var_merge['usa_' + api_name+'_prev']) * 100

In [29]:
var_merge.columns

Index([u'home_value_latest', u'home_value_moe_latest', u'GEOdisplaylabel',
       u'GEO.id2', u'moe_pct_latest', u'home_value_prev',
       u'home_value_moe_prev', u'GEOLevel1', u'GEOLevel2', u'moe_pct_prev',
       u'home_value_prev_winf', u'nj_home_value_latest', u'nj_home_value_prev',
       u'usa_home_value_latest', u'usa_home_value_prev',
       u'home_value_per_change', u'nj_home_value_change',
       u'us_home_value_change'],
      dtype='object')

In [30]:
# re-ordering columns the way I want them

var_merge = var_merge[[ u'GEOdisplaylabel', u'GEO.id2',
        u'GEOLevel1', u'GEOLevel2',api_name + '_latest', api_name + '_moe_latest', 
        'moe_pct_latest',
        api_name + '_prev', api_name + '_moe_prev', 'moe_pct_prev', 'home_value_prev_winf',
       u'nj_' + api_name+'_latest', u'nj_' + api_name+'_prev',
       u'usa_' + api_name+'_latest', u'usa_' + api_name+'_prev',
       api_name + '_per_change', u'nj_' + api_name+'_change',
       u'us_' + api_name+'_change']]

In [31]:
var_merge.head()

Unnamed: 0,GEOdisplaylabel,GEO.id2,GEOLevel1,GEOLevel2,home_value_latest,home_value_moe_latest,moe_pct_latest,home_value_prev,home_value_moe_prev,moe_pct_prev,home_value_prev_winf,nj_home_value_latest,nj_home_value_prev,usa_home_value_latest,usa_home_value_prev,home_value_per_change,nj_home_value_change,us_home_value_change
577,Atlantic County,34001,Atlantic County,New Jersey,219000.0,2698.0,1.231963,248100.0,3154.0,1.271262,264871.56,321100,337900,193500,181400,-11.729141,-4.971885,6.670342
98,"Absecon city, Atlantic County",3400100100,Absecon city,Atlantic County,196100.0,9883.0,5.039776,246600.0,14572.0,5.909165,263270.16,321100,337900,193500,181400,-20.478508,-4.971885,6.670342
121,"Atlantic City city, Atlantic County",3400102080,Atlantic City city,Atlantic County,168800.0,7371.0,4.366706,220400.0,9245.0,4.194646,235299.04,321100,337900,193500,181400,-23.411978,-4.971885,6.670342
109,"Brigantine city, Atlantic County",3400107810,Brigantine city,Atlantic County,348900.0,14425.0,4.134422,417700.0,22658.0,5.424467,445936.52,321100,337900,193500,181400,-16.471152,-4.971885,6.670342
112,"Buena borough, Atlantic County",3400108680,Buena borough,Atlantic County,182900.0,12682.0,6.933844,195900.0,12373.0,6.315978,209142.84,321100,337900,193500,181400,-6.636039,-4.971885,6.670342


In [33]:
# var_merge.to_csv('median_home_value_data_towns_clean.csv', header=True, index=False)