In [1]:
# Dependencies

import pandas as pd
import requests
import json
import pprint
import numpy as np
import matplotlib.pyplot as plt
import requests
from census import Census
import gmaps
from us import states


# Census & gmaps API Keys
from config import (api_key, gkey)
year = 2012
c = Census(api_key, year)

# Configure gmaps
gmaps.configure(api_key=gkey)


ModuleNotFoundError: No module named 'census'

In [None]:
datalst = []

In [None]:
for year in range(2012, 2018):
    c = Census(api_key, year) 

    data = c.acs5.get(('NAME', "B25077_001E","B25064_001E", "B15003_022E","B19013_001E"), 
                      {'for': 'zip code tabulation area:*'}) 

    cleandata = (pd.DataFrame(data) 
                   .rename(columns={"NAME": "Name", 
                                    "zip code tabulation area": "Zipcode", 
                                    "B25077_001E": "Median_Home_Value", 
                                    "B25064_001E": "Median_Rent", 
                                    "B15003_022E": "Bachelor_Degrees", 
                                    "B19013_001E": "Median_Income"}) 
                   .astype({'Zipcode':'int64'}) 
                   .query('(Median_Home_Value > 0) & (Median_Rent > 0) & (Median_Income > 0)')
                   .dropna()
                   .assign(Year = year)
                 )

    datalst.append(cleandata)


final_data = pd.concat(datalst, ignore_index = True)
final_data.head()

In [None]:
#Seperate DF By Year
data2012 = final_data.query('(Year == 2012)')
data2013 =final_data.query('(Year == 2013)')
data2014 =final_data.query('(Year == 2014)')
data2015 =final_data.query('(Year == 2015)')
data2016 =final_data.query('(Year == 2016)')
data2017 =final_data.query('(Year == 2017)')

In [None]:
#Change DF
mergerdf = data2012.merge(data2017,how='outer',on='Name',suffixes=('_2012','_2017'))

In [None]:
mergedf = mergerdf.dropna()

In [None]:
mergedf.head()

In [None]:
#Change in Home Value
mergedf['Change_In_Value']=(mergedf['Median_Home_Value_2017']-mergedf['Median_Home_Value_2012'])/(mergedf['Median_Home_Value_2012'])

In [None]:
#Change in Rent
mergedf['Change_In_Rent']=(mergedf['Median_Rent_2017']-mergedf['Median_Rent_2012'])/(mergedf['Median_Rent_2012'])

In [None]:
#Change in Degree
mergedf['Change_In_Degree']=(mergedf['Bachelor_Degrees_2017']-mergedf['Bachelor_Degrees_2012'])/(mergedf['Bachelor_Degrees_2012'])

In [None]:
#Change in Income
mergedf['Change_In_Income']=(mergedf['Median_Income_2017']-mergedf['Median_Income_2012'])/(mergedf['Median_Income_2012'])

In [None]:
mergedf.head()

In [None]:
#Change in Percent Data Frame
changedf = mergedf[['Zipcode_2012','Name','Change_In_Value','Change_In_Income','Change_In_Degree','Change_In_Rent']]

In [None]:
changedf = changedf.rename(columns = {'Zipcode_2012':'Zipcode'}).astype({'Zipcode':'int64'})

In [None]:
####formatteddf = changedf.style.format({'Change_In_Value': "{:.2%}",'Change_In_Rent': "{:.2%}",'Change_In_Degree': "{:.2%}"})

In [None]:
changedf.head()

In [None]:
# Source for base data (changes from 2000 to 2016): 
# https://www.multifamilyexecutive.com/property-management/demographics/the-nations-most-gentrified-neighborhoods_o
base = {'Zipcode': [90014, 20001, 77003, 19123, 10039, 
                    76102, 11211, 19146, 11222, 11216,
                    63103, 90013, 78702, 11237, 10026, 
                    21224, 11221, 20010, 77007, 98402],
       'Change_In_Value': [707, 207, 284, 203, 356, 
                           323, 167, 404, 116, 194, 
                           250, 133, 212, 111, 219, 
                           107, 130, 146, 107, 103],
       'Change_In_Income': [95, 163, 71, 95, 32, 
                            103, 79, 51, 56, 48,
                            44, 77, 47, 43, 65, 
                            43, 39, 48, 114, 83],
       'Change_In_Degree': [857, 212, 443, 230, 168, 
                            122, 95, 106, 97, 149, 
                            153, 199, 201, 125, 92, 
                            109, 100, 84, 188, 141]}
base_df = pd.DataFrame(base)
base_df.head()

In [None]:
#Create a data frame of zip codes that fit criteria or potential gentrification (22 Zip codes)
gent_df = changedf.loc[(changedf['Change_In_Value'] >= 1) & 
                       (changedf['Change_In_Income'] >= 0.4) & 
                       (changedf['Change_In_Degree'] >= 0.9)]
gent_df['Change_In_Value'] = (gent_df['Change_In_Value']*100).round()
gent_df['Change_In_Income'] = (gent_df['Change_In_Income']*100).round()
gent_df['Change_In_Degree'] = (gent_df['Change_In_Degree']*100).round()
gent_df['Change_In_Rent'] = (gent_df['Change_In_Rent']*100).round()
gent_df.head()

In [None]:
#Create a data frame of zip codes that do not fit potential gentrification criteria (also does not include base zip codes)
nongent_df = changedf.loc[((changedf['Change_In_Value'] < 1) | 
                       (changedf['Change_In_Income'] < 0.4) | 
                       (changedf['Change_In_Degree'] < 0.9)) & 
                          (changedf['Zipcode'] != (90014 | 20001 | 77003 | 19123 | 10039 | 
                                                   76102 | 11211 | 19146 | 11222 | 11216 | 
                                                   63103 | 90013 | 78702 | 11237 | 10026 | 
                                                   21224 | 11221 | 20010 | 77007 | 98402))]
nongent_df['Change_In_Value'] = (nongent_df['Change_In_Value']*100).round()
nongent_df['Change_In_Income'] = (nongent_df['Change_In_Income']*100).round()
nongent_df['Change_In_Degree'] = (nongent_df['Change_In_Degree']*100).round()
nongent_df['Change_In_Rent'] = (nongent_df['Change_In_Rent']*100).round()
nongent_df.head()

In [None]:
# Build the scatter plots for base, nongent, and 'predictive' zipcodes
nongentIncX = nongent_df['Change_In_Income']
gentIncX = gent_df['Change_In_Income']
baseIncX = base_df['Change_In_Income']
nongentValY = nongent_df['Change_In_Value']
gentValY = gent_df['Change_In_Value']
baseValY = base_df['Change_In_Value']

plt.figure(figsize=(10,7))
plt.scatter(nongentIncX, nongentValY, marker="o", s=(nongent_df['Change_In_Degree']/2), facecolors='blue', 
            edgecolors="black", alpha=0.5, label='Non-gentrified')
plt.scatter(gentIncX, gentValY, marker="o", s=(gent_df['Change_In_Degree']/2), facecolors='green', 
            edgecolors="black", alpha=0.5, label='On the way')
plt.scatter(baseIncX, baseValY, marker="o", s=(base_df['Change_In_Degree']/2), facecolors='red', 
            edgecolors="black", alpha=0.5, label='Gentrified')

plt.title("Gentrification Data (by zip code)")
plt.xlabel("Change in Median Income (%)")
plt.ylabel("Change in Median Home Value (%)")
plt.grid()

lgnd = plt.legend(loc="upper right", title="Zip Code Type")
lgnd.legendHandles[0]._sizes = [30]
lgnd.legendHandles[1]._sizes = [30]
lgnd.legendHandles[2]._sizes = [30]

side_text = plt.figtext(0.93, 0.5, 'Note: \n Circle size correlates \n with % change in bachelors degrees.')


In [None]:
#Avg changes (all zips)
aGrad= round(((sum(mergedf['Bachelor_Degrees_2017'])-sum(mergedf['Bachelor_Degrees_2012']))/sum(mergedf['Bachelor_Degrees_2012']))*100, 2)
aVals = round((changedf['Change_In_Value'].mean()*100), 2)
aInc = round((changedf['Change_In_Income'].mean()*100), 2)


In [None]:
#Build a bar chart for potential gentrified zip codes
#also display avg changes for all zip codes
x_axis = np.arange(len(gent_df['Zipcode']))
vals = gent_df['Change_In_Value']
inc = gent_df['Change_In_Income']
grad = gent_df['Change_In_Degree']
width = (1/3)

fig, ax = plt.subplots(figsize=(10,7))

valChng = ax.bar(x_axis - width, vals, width, align="center", color='green', alpha=0.3, label='Home Value Change(%)')
incChng = ax.bar(x_axis, inc, width, align="center", color='green', alpha=0.6, label='Income Change(%)')
gradChng = ax.bar(x_axis + width, grad, width, align="center", color='green', alpha=0.9, label='Change in Bachelor Degree(%)')
ax.set_xticks(x_axis)
plt.axhline(y=aVals, label=f'Avg Value Change (all zips) {aVals}%', color='blue', alpha=0.3)
plt.axhline(y=aInc, label=f'Avg Income Change (all zips) {aInc}%', color='blue', alpha=0.6)
plt.axhline(y=aGrad, label=f'Avg Degree Change (all zips) {aGrad}%', color='blue', alpha=0.9)

plt.grid()
tick_locations = [value for value in x_axis]
plt.xticks(tick_locations, gent_df['Zipcode'], rotation=45)
plt.title("% Changes for 'On-The-Way' Zip Codes")
plt.ylabel("% Change")
lgnd = plt.legend(loc="upper right", title="Metrics")

In [None]:
#Build a bar chart for potential gentrified zip codes
#also display avg changes for all zip codes
x_axis = np.arange(len(base_df['Zipcode']))
vals = base_df['Change_In_Value']
inc = base_df['Change_In_Income']
grad = base_df['Change_In_Degree']
width = (1/3)

fig, ax = plt.subplots(figsize=(10,7))

valChng = ax.bar(x_axis - width, vals, width, align="center", color='red', alpha=0.3, label='Home Value Change(%)')
incChng = ax.bar(x_axis, inc, width, align="center", color='red', alpha=0.6, label='Income Change(%)')
gradChng = ax.bar(x_axis + width, grad, width, align="center", color='red', alpha=0.9, label='Change in Bachelor Degree(%)')
ax.set_xticks(x_axis)
plt.axhline(y=aVals, label=f'Avg Value Change (all zips) {aVals}%', color='blue', alpha=0.3)
plt.axhline(y=aInc, label=f'Avg Income Change (all zips) {aInc}%', color='blue', alpha=0.6)
plt.axhline(y=aGrad, label=f'Avg Degree Change (all zips) {aGrad}%', color='blue', alpha=0.9)

plt.grid()
tick_locations = [value for value in x_axis]
plt.xticks(tick_locations, base_df['Zipcode'], rotation=45)
plt.title("% Changes for Fully Gentrified Zip Codes")
plt.ylabel("% Change")
lgnd = plt.legend(loc="upper right", title="Metrics")

In [None]:
top_rent = gent_df.nlargest(15, ['Change_In_Rent'])