# Gentrification
This notebook implements the two tests described in Governing Magazine's [Gentrification Report Methodology](http://www.governing.com/gov-data/gentrification-report-methodology.html).

In [157]:
from collections import defaultdict
import geopandas as gpd
import pandas as pd

In [161]:
from configparser import RawConfigParser
import os.path
import uscensus

First load the geocoded addresses and extract the distinct census states and counties (we will retrieve data for all tracts)

In [158]:
df = pd.read_csv('geocoded_addresses.csv', dtype={
    'Geo.FIPS.State': str,
    'Geo.FIPS.County': str,
    'Geo.Tract': str,
    'Geo.Block': str,
})

In [159]:
cols = ['Geo.FIPS.State',
        'Geo.FIPS.County',
       ]
geos = pd.DataFrame(
    list(set(df[cols].dropna().apply(tuple, axis=1))),
    columns=cols
)

In [160]:
geos.shape

(28, 2)

In [162]:
rcp = RawConfigParser()

In [163]:
rcp.read(os.path.expanduser('~/.census'))

['C:\\cygwin64\\home\\nkrishna/.census']

In [164]:
key = rcp.get('census', 'api_key')

In [165]:
cache = uscensus.util.SqlAlchemyCache('sqlite://')

In [166]:
di11 = uscensus.DiscoveryInterface(key, cache, vintage=2011)

Indexing metadata
Done indexing metadata


Start with the 2011 ACS 5-year estimates

In [167]:
acs5_2011 = di11['2011/acs5']

Incrementally search to find the right variables

In [168]:
acs5_2011.searchVariables('concept:B01003')

[('B01003_001M',
  {'concept': 'B01003.  Total Population',
   'group': 'N/A',
   'label': 'Margin Of Error For!!Total',
   'limit': 0,
   'validValues': []}),
 ('B01003_001E',
  {'concept': 'B01003.  Total Population',
   'group': 'N/A',
   'label': 'Total',
   'limit': 0,
   'validValues': []})]

In [187]:
stats_11 = None
fields_11 = {
    'B01003_001E': 'Total.Population',
    'B19013_001E': 'Median.Household.Income',
    'B25077_001E': 'Median.Home.Value',
    'B16010_041E': 'Bachelors.Or.Higher',
}

Fetch the fields for each tract in each county

In [188]:
for st, cty in geos.itertuples(index=False):
    stats_cty = acs5_2011(
        fields.keys(),
        geo_for={'tract': '*'},
        geo_in={'state': st,
         'county': cty,
        })
    if stats_11 is None:
        stats_11 = stats_cty
    else:
        stats_11 = stats_11.append(stats_cty)

> The first test found that a tract was eligible to gentrify if it met the following criteria:
>
> 1. The tract had a population of at least 500 residents at the beginning and end of a decade and was  located within a central city. 
> 2. The tract’s median household income was in the bottom 40th percentile when compared to all tracts within its metro area at the beginning of the decade.
> 3. The tract’s median home value was in the bottom 40th percentile when compared to all tracts within its metro area at the beginning of the decade.


In [189]:
stats_11.rename(columns=fields_11, inplace=True)

stats_11.set_index(['state','county','tract'], inplace=True)

stats_11.sort_values('Median.Household.Income', inplace=True)
stats_11['Income.Rank'] = range(1, stats_11.shape[0] + 1)
stats_11['Income.Percentile'] = stats_11['Income.Rank'] / len(stats_11['Income.Rank'])

stats_11.sort_values('Median.Home.Value', inplace=True)
stats_11['Home.Value.Rank'] = range(1, stats_11.shape[0] + 1)
stats_11['Home.Value.Percentile'] = stats_11['Home.Value.Rank'] / len(stats_11['Home.Value.Rank'])

In [193]:
stats_11.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Total.Population,Bachelors.Or.Higher,Median.Household.Income,Median.Home.Value,Income.Rank,Income.Percentile,Home.Value.Rank,Home.Value.Percentile
state,county,tract,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
05,107,480100,3247,480,32891,85300,1088,0.277268,3479,0.886595
05,107,480200,6206,557,36101,60600,1297,0.330530,2904,0.740061
05,107,480300,6018,272,26771,57200,720,0.183486,2835,0.722477
05,107,480400,1882,108,20895,74600,478,0.121814,3221,0.820846
05,107,480500,2655,179,19775,45800,443,0.112895,2594,0.661060
05,107,480600,2047,87,25313,34400,638,0.162589,2276,0.580020
12,111,380100,767,40,12488,103900,233,0.059378,95,0.024210
12,111,380200,3664,132,18776,86700,411,0.104740,3517,0.896279
12,111,380300,6657,337,23581,78400,562,0.143221,3311,0.843782
12,111,380400,5662,229,29475,99600,849,0.216361,3822,0.974006


And the 2015 ACS 5

In [None]:
di15 = uscensus.DiscoveryInterface(key, cache, vintage=2015)

In [217]:
acs5_2015 = di15.search('American Community Survey 5-year estimates')[0]

In [219]:
for f in fields_11:
    print(acs5_2015.searchVariables('variable:'+f))

[('B01003_001E', {'label': 'Total', 'concept': 'B01003.  Total Population', 'group': 'N/A', 'limit': 0, 'validValues': [], 'predicateType': 'int'})]
[('B16010_041E', {'label': "Bachelor's degree or higher:", 'concept': 'B16010.  EDUCATIONAL ATTAINMENT AND EMPLOYMENT STATUS BY LANGUAGE SPOKEN AT HOME FOR THE POPULATION 25 YEARS AND OVER', 'group': 'N/A', 'limit': 0, 'validValues': [], 'predicateType': 'int'})]
[('B19013_001E', {'label': 'Median household income in the past 12 months (in 2015 Inflation-adjusted dollars)', 'concept': 'B19013. Median Household Income in the Past 12 Months (in 2015 Inflation-Adjusted Dollars)', 'group': 'N/A', 'limit': 0, 'validValues': [], 'predicateType': 'int'})]
[('B25077_001E', {'label': 'Median value (dollars)', 'concept': 'B25077.  Median Value (Dollars) for Owner-Occupied Housing Units', 'group': 'N/A', 'limit': 0, 'validValues': [], 'predicateType': 'int'})]


In [221]:
fields_15 = fields_11
stats_15 = None

In [222]:
for st, cty in geos.itertuples(index=False):
    stats_cty = acs5_2015(
        fields.keys(),
        geo_for={'tract': '*'},
        geo_in={'state': st,
         'county': cty,
        })
    if stats_15 is None:
        stats_15 = stats_cty
    else:
        stats_15 = stats_15.append(stats_cty)

> For a second test, gentrification-eligible tracts were determined to have gentrified over a time period if they met the following criteria:
> 
> 1. An increase in a tract's educational attainment, as measured by the percentage of residents age 25 and over holding bachelor’s degrees, was in the top third percentile of all tracts within a metro area.
> 2. A tract’s median home value increased when adjusted for inflation.
> 3. The percentage increase in a tract’s inflation-adjusted median home value was in the top third percentile of all tracts within a metro area.
>
> Census tracts with missing data for any one of these measures were excluded and considered not eligible to gentrify.

In [223]:
stats_15.rename(columns=fields_15, inplace=True)

stats_15.set_index(['state','county','tract'], inplace=True)

stats_15.sort_values('Median.Household.Income', inplace=True)
stats_15['Income.Rank'] = range(1, stats_15.shape[0] + 1)
stats_15['Income.Percentile'] = stats_15['Income.Rank'] / len(stats_15['Income.Rank'])

stats_15.sort_values('Median.Home.Value', inplace=True)
stats_15['Home.Value.Rank'] = range(1, stats_15.shape[0] + 1)
stats_15['Home.Value.Percentile'] = stats_15['Home.Value.Rank'] / len(stats_15['Home.Value.Rank'])

In [225]:
stats = stats_11.join(stats_15, lsuffix='.2011', rsuffix='.2015')

In [237]:
pop500_2011 = stats['Total.Population.2011'].astype(int) >= 500
pop500_2015 = stats['Total.Population.2015'].astype(int) >= 500
income_bot_40 = stats['Income.Percentile.2011'] < 0.40
home_value_bot_40 = stats['Home.Value.Percentile.2011'] < 0.40

In [238]:
gent_eligible = pop500_2011 & pop500_2015 & income_bot_40 & home_value_bot_40

In [239]:
gent_eligible.sum()

387

In [240]:
stats['Gentrification.Eligible'] = gent_eligible

In [246]:
stats['Ed.Attainment.Increase'] = (
    (stats['Bachelors.Or.Higher.2015'].astype(int) /
     stats['Total.Population.2015'].astype(int)) -
    (stats['Bachelors.Or.Higher.2011'].astype(int) /
     stats['Total.Population.2011'].astype(int)))

In [248]:
stats.sort_values('Ed.Attainment.Increase', inplace=True)

In [249]:
stats['Ed.Attainment.Increase.Rank'] = range(1, stats.shape[0] + 1)
stats['Ed.Attainment.Increase.Percentile'] = stats['Ed.Attainment.Increase.Rank'] / len(stats['Ed.Attainment.Increase.Rank'])

In [250]:
top_third_ed_att_inc = stats['Ed.Attainment.Increase.Percentile'] >= 2/3

For the inflation adjustment rate, I used the BLS's calculator:
https://data.bls.gov/cgi-bin/cpicalc.pl?cost1=100&year1=201101&year2=201501

In [258]:
inflation = 1.0612
has_val = stats.dropna(subset=['Median.Home.Value.2011', 'Median.Home.Value.2015'])
home_val_change = (has_val['Median.Home.Value.2015'].astype(int) -
                   has_val['Median.Home.Value.2011'].astype(int) * inflation)

In [259]:
stats['Home.Value.Increased'] = home_val_change > 0
stats['Home.Value.Change'] = home_val_change
stats.sort_values('Home.Value.Change', inplace=True)
stats['Home.Value.Change.Rank'] = range(1, stats.shape[0] + 1)
stats['Home.Value.Change.Percentile'] = stats['Home.Value.Change.Rank'] / len(stats['Home.Value.Change.Rank'])

In [266]:
gentrified = (stats['Gentrification.Eligible'] &
              top_third_ed_att_inc &
              stats['Home.Value.Increased'] &
              stats['Home.Value.Change.Percentile'] >= 2/3)

In [268]:
stats['Gentrified'] = gentrified

In [270]:
stats[stats['Gentrified']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Total.Population.2011,Bachelors.Or.Higher.2011,Median.Household.Income.2011,Median.Home.Value.2011,Income.Rank.2011,Income.Percentile.2011,Home.Value.Rank.2011,Home.Value.Percentile.2011,Total.Population.2015,Bachelors.Or.Higher.2015,...,Home.Value.Percentile.2015,Gentrification.Eligible,Ed.Attainment.Increase,Ed.Attainment.Increase.Rank,Ed.Attainment.Increase.Percentile,Home.Value.Increased,Home.Value.Change,Home.Value.Change.Rank,Home.Value.Change.Percentile,Gentrified
state,county,tract,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
48,201,342800,8692,1542,109569,166500,120,0.030581,1115,0.284149,8797,1823,...,0.575433,True,0.029825,2820,0.718654,True,10.2000,2568,0.654434,True
48,201,541203,2413,834,109792,178800,121,0.030836,1235,0.314730,2536,985,...,0.608563,True,0.042779,3229,0.822885,True,57.4400,2571,0.655199,True
48,201,340900,5083,538,34711,115900,1204,0.306830,345,0.087920,4848,724,...,0.392712,True,0.043497,3246,0.827217,True,906.9200,2643,0.673547,True
48,29,121403,5589,307,33503,102300,1118,0.284913,55,0.014016,5975,568,...,0.322120,True,0.040133,3146,0.801733,True,1039.2400,2648,0.674822,True
48,439,113407,6452,382,38864,116300,1462,0.372579,356,0.090724,6073,509,...,0.395770,True,0.024607,2626,0.669215,True,1082.4400,2652,0.675841,True
48,113,000405,2538,113,21685,182100,504,0.128440,1266,0.322630,2260,338,...,0.623089,True,0.105034,3819,0.973242,True,1155.4800,2659,0.677625,True
48,29,181725,5218,341,34606,109100,1197,0.305046,204,0.051988,5404,584,...,0.358563,True,0.042717,3227,0.822375,True,1323.0800,2674,0.681448,True
48,453,002108,3814,419,31853,137200,1013,0.258155,737,0.187819,3857,603,...,0.480377,True,0.046481,3309,0.843272,True,2203.3600,2741,0.698522,True
48,439,113929,8696,2079,102750,183700,43,0.010958,1283,0.326962,9188,2559,...,0.626402,True,0.039440,3132,0.798165,True,2357.5600,2758,0.702854,True
48,453,001805,4547,405,33700,119100,1131,0.288226,400,0.101937,5030,612,...,0.410805,True,0.032600,2909,0.741335,True,2611.0800,2777,0.707696,True


In [271]:
stats[['Gentrification.Eligible', 'Gentrified']].to_csv('gentrification.csv')