## ACS Demographics by BlockGroup
This workbook processes data from Census ACS files and other public and derived sources to create a normalized data table that can be used for analysis, machine learning, and visualization. We are processing data at the Census Block Group level to support granular data analytics and to support cross-discipline collaboration.

A visual analytic dashboard using this information is available here: https://public.tableau.com/views/BlockgroupEquityData/COMPARE?:embed=y&:display_count=yes&publish=yes

Census data files were downloaded at: https://factfinder.census.gov/faces/nav/jsf/pages/searchresults.xhtml?refresh=t&keepList=t

Data are by Blockgroup for King County, Washington.

B02001 - Race
B19301 - Per Capita Income
B25003 - Tenure
B01003 - Total Population
B19013 - Median Household Income
B15003 - Educational Attainment for 25 years and older
B22010 - Receipt of Food Stamps by Disability Status

The Census data files were downloaded from the factfinder and then been preproccessed to remove the first line and rename the "totals" column to a feature unique for the data set. Income values are transformed to 2017 dollars using the CPI-U for Seattle.

Property Sales data were extracted from King County Sales data by parcel and then geocoded to Block Group using the Universal Geocoder.

For more information, contact Steve Barham at stephen.barham@seattle.gov.


In [None]:
import csv
import pandas as pd
import matplotlib.path as mplPath
import numpy as np
from matplotlib import path
from ast import literal_eval
import os
from os import listdir
from os.path import isfile, join

# CPI Assumptions, source: http://www.seattle.gov/financedepartment/cpi/historical.htm
CPI_U2013 = 241.563
CPI_U2014 = 246.018
CPI_U2015 = 249.364
CPI_U2016 = 254.886
CPI_U2017 = 262.668

CPI = CPI_U2017

## Census Files

In [None]:
inputPath = "../data/ACS_raw/"

Files = [f for f in listdir(inputPath) if isfile(join(inputPath, f))]
Years = ["2013","2014","2015","2016"]
df_bg_demographics = pd.DataFrame()

# clean up and processing
for j, year in enumerate(Years):
    
    status = "first"
    
    for i, File in enumerate(Files):
        
        
        if "20" + File[4:6] == year:
            
            df = pd.read_csv(inputPath + File)
            df['Year'] = "20" + File[4:6]
            df['Blockgroup'] = df["Id2"]

            if status == "first":
                df_demographics = df
                status = ""
            
            else:
            
                df_demographics = pd.merge(left=df_demographics, right=df, how='left',on=['Blockgroup'])

            for column in df_demographics.columns.values:
        
                if column[0:2] == "Id":
                    df_demographics.drop([column], axis = 1, inplace = True)
            
                if column[0:6] == "Margin":
                    df_demographics.drop([column], axis = 1, inplace = True)
            
                if column[0:9] == "Geography":
                    df_demographics.drop([column], axis = 1, inplace = True)
    
                if column[-2:] == "_y":
                    df_demographics.drop([column], axis = 1, inplace = True)
        
                df_demographics.dropna(axis=1, how='any')
    
                if column[-2:] == "_x":
                    df_demographics = df_demographics.rename(columns={column:column[:-2]})
    
                if column[0:19] == "Estimate; Total: - ":
                    df_demographics = df_demographics.rename(columns={column:column[19:]})

        
    if j == 0:
        df_bg_demographics = df_demographics
        print (year)
    else:
        df_bg_demographics = pd.concat([df_demographics,df_bg_demographics])
        print (year)
        
df_bg_demographics = df_bg_demographics[df_bg_demographics['Population'] > 0]

# income CPI adjustments
df_bg_demographics['Pc_income'] = np.where(df_bg_demographics['Year'] == '2013', df_bg_demographics['Pc_income'].astype(float) * CPI_U2013 / CPI, df_bg_demographics['Pc_income'])
df_bg_demographics['Pc_income'] = np.where(df_bg_demographics['Year'] == '2014', df_bg_demographics['Pc_income'].astype(float) * CPI_U2014 / CPI, df_bg_demographics['Pc_income'])
df_bg_demographics['Pc_income'] = np.where(df_bg_demographics['Year'] == '2015', df_bg_demographics['Pc_income'].astype(float) * CPI_U2015 / CPI, df_bg_demographics['Pc_income'])
df_bg_demographics['Pc_income'] = np.where(df_bg_demographics['Year'] == '2016', df_bg_demographics['Pc_income'].astype(float) * CPI_U2016 / CPI, df_bg_demographics['Pc_income'])

# remove non-numerical
df_bg_demographics['Med_Hh_Income'] = df_bg_demographics['Med_Hh_Income'].str.extract('(\d+)', expand=False)
df_bg_demographics['Med_Hh_Income'] = np.where(df_bg_demographics['Year'] == '2013', df_bg_demographics['Med_Hh_Income'].astype(float) * CPI_U2013 / CPI, df_bg_demographics['Med_Hh_Income'])
df_bg_demographics['Med_Hh_Income'] = np.where(df_bg_demographics['Year'] == '2014', df_bg_demographics['Med_Hh_Income'].astype(float) * CPI_U2014 / CPI, df_bg_demographics['Med_Hh_Income'])
df_bg_demographics['Med_Hh_Income'] = np.where(df_bg_demographics['Year'] == '2015', df_bg_demographics['Med_Hh_Income'].astype(float) * CPI_U2015 / CPI, df_bg_demographics['Med_Hh_Income'])
df_bg_demographics['Med_Hh_Income'] = np.where(df_bg_demographics['Year'] == '2016', df_bg_demographics['Med_Hh_Income'].astype(float) * CPI_U2016 / CPI, df_bg_demographics['Med_Hh_Income'])

# tenure percentages
df_bg_demographics["Rent"] = df_bg_demographics["Rent"]/df_bg_demographics["Tenure Total"]
df_bg_demographics["Own"] = df_bg_demographics["Own"]/df_bg_demographics["Tenure Total"]

# race percentages
df_bg_demographics["American Indian and Alaska Native alone"] = df_bg_demographics["American Indian and Alaska Native alone"]/df_bg_demographics["Race Total"]
df_bg_demographics["Black or African American alone"] = df_bg_demographics["Black or African American alone"]/df_bg_demographics["Race Total"]
df_bg_demographics['Asian alone'] = df_bg_demographics['Asian alone']/df_bg_demographics["Race Total"]
df_bg_demographics['Native Hawaiian and Other Pacific Islander alone'] = df_bg_demographics['Native Hawaiian and Other Pacific Islander alone']/df_bg_demographics["Race Total"]
df_bg_demographics['Some other race'] = df_bg_demographics['Some other race alone']/df_bg_demographics["Race Total"]
df_bg_demographics['Two or more races'] = df_bg_demographics['Two or more races']/df_bg_demographics["Race Total"]
df_bg_demographics['White alone'] = df_bg_demographics['White alone']/df_bg_demographics["Race Total"]

# food stamps percentages
df_bg_demographics['Food Stamps'] = df_bg_demographics['Estimate; Household received Food Stamps/SNAP in the past 12 months:'] / df_bg_demographics["Food Stamps Total"] 
for column in df_bg_demographics.columns.values:
    if column[0:19] == "Estimate; Household":
        df_bg_demographics.drop([column], axis = 1, inplace = True)
        
# education percentages    
df_bg_demographics['Four Year College'] = (df_bg_demographics["Bachelor's degree"] + df_bg_demographics["Master's degree"] + df_bg_demographics["Doctorate degree"]) / df_bg_demographics["Edu Total"] 

print(df_bg_demographics.columns.values)        
df_bg_demographics.to_csv('V:\Asset Management Program\Data Science\Equity\Blockgroup_demographics.csv', mode='w', header=True, index=False)

## Block Group Attribute: Distance to Downtown
Use the Google Distance Matrix to calculate the distance to Downtown Seattle

In [None]:
API_Key = open(".\Variables\google_distance_query.txt", 'r').read()
import json
from pandas.io.json import json_normalize
try:
    from urllib.request import Request, urlopen  # Python 3
except:
    from urllib2 import Request, urlopen  # Python 2

In [None]:
df_bg_demographics = pd.read_csv('V:\Asset Management Program\Data Science\Equity\Blockgroup_demographics.csv')
df_Origins = pd.read_csv('V:\Asset Management Program\Data Science\Geographies\\SeattleCensusBlocksandNeighborhoodCorrelationFile.csv')

print (df_Origins.head())

In [None]:
Distance = []

for index, row in df_Origins.iterrows():
    
    Origin = str(row["CT_LAT"]) + "," + str(row["CT_LON"])
    Destination = "47.610954,-122.337177" # Westlake Park
    URL = "https://maps.googleapis.com/maps/api/distancematrix/json?units=imperial&mode=driving&origins=" + Origin + \
                     "&destinations=" + Destination + "&key=" + API_Key
    
    q = Request(URL)
    a = urlopen(q).read()
    data = json.loads(a)

    if 'errorZ' in data:
         print (data["error"])
        
    df = json_normalize(data['rows'][0]['elements'])  
    df['distance.value'] = df['distance.value']/1609
    Distance.append(df['distance.value'].tolist()[0])
    
        

In [None]:
# Merge with demographic file
df_result = pd.DataFrame()
df_result['Dist Downtown'] = Distance
df_result['BLOCKGROUP'] = df_Origins['BLOCKGROUP']

df_Merge = pd.merge(left=df_bg_demographics, right=df_result, how='left', left_on='Blockgroup', right_on='BLOCKGROUP')
df_Merge.drop('BLOCKGROUP', axis=1, inplace=True)

# Save to file
df_Merge.to_csv('V:\Asset Management Program\Data Science\Equity\Blockgroup_demographics.csv', mode='w', header=True, index=False)
df_Merge.to_csv('../data/ACS_raw/Blockgroup_demographics.csv', mode='w', header=True, index=False)

## Block Group Attribute: Property Sales
Property sales data has been processed in xxxxx.ipnyb. :todo: get file from Akoly.


In [None]:
# Load file attribute file
df_bg_demographics = pd.read_csv('../data/Blockgroup_demographics.csv')
print (df_bg_demographics.head())
#df_bg_demographics['Blockgroup'] = df_bg_demographics['Blockgroup'].astype(str)

# Load and transform sales data
df_KCSales = pd.read_csv('../data/KCSales1_blkgrps.csv')
df_KCSales['year'] = df_KCSales['DocumentDate'].apply(lambda x: x[-4:])
df_KCSales['year'] = df_KCSales['year'].astype(int)
print (df_KCSales.head())


df_BGSales = df_KCSales.groupby('year', as_index=False).agg({'ParcelID':['count']})
print (df_BGSales.head())

# Calculate average sales per year for each blockgroup
df_BGSales = df_KCSales.groupby(['blkgrp','year'], as_index=False).agg({'SalePrice':['mean'],'ParcelID':['count']})
df_BGSales.columns = df_BGSales.columns.droplevel(level=1)

df_BGSales.rename(columns={'ParcelID':'ParcelCount','SalePrice':'AveSalePrice','year':'Year'}, inplace=True)
    
print (df_BGSales.head())

df_Merge = pd.merge(df_bg_demographics, df_BGSales, how='left', left_on=['Blockgroup','Year'], right_on=['blkgrp','Year'])

#new_df = pd.merge(A_df, B_df,  how='left', left_on=['A_c1','c2'], right_on = ['B_c1','c2'])

#df_Merge.to_csv('../data/Blockgroup_demographics.csv', mode='w', header=True, index=False)

print (df_Merge.head())

## Block Group Attribute: Mobility Indices


In [None]:
# Load file attribute file
df_bg_demographics = pd.read_csv('../data/Blockgroup_demographics.csv')
#df_bg_demographics['Blockgroup'] = df_bg_demographics['Blockgroup'].astype(int)

print (df_bg_demographics)

# Load and transform sales data
df_Mobility = pd.read_csv('../data/mobility_indices.csv')
#df_Mobility['block_group'] = df_Mobility['block_group'].astype(int)

print (df_Mobility)

df_Merge = pd.merge(df_bg_demographics, df_Mobility, how='left', left_on='Blockgroup', right_on='block_group')
df_Merge.drop('block_group', axis=1, inplace=True)

df_Merge.to_csv('../data/Blockgroup_demographics.csv', mode='w', header=True, index=False)

print (df_Merge)

