# Import packages, set directories

In [2]:
print('Importing packages...')
import os
import pandas
import time
import numpy as np
from matplotlib import pyplot as plt
import geopandas
import re
from IPython.display import display, HTML
from pprint import pprint

pandas.set_option('display.max_colwidth', -1)
debug = 2    # debug = 2 means also show maps

#basedir = '/home/idies/workspace/Storage/raddick/Baltimore/community_reinvestment_act/datasets/acs5/'
#census_data_basedir = '/home/idies/workspace/Storage/raddick/Baltimore/community_reinvestment_act/datasets/acs5/'

thisdir = '/home/idies/workspace/Storage/raddick/Baltimore/community_reinvestment_act/'
datadir = thisdir + 'datasets/'
census_data_dir = datadir + 'acs5/'
census_acs5_rawdata_basedir = '/home/idies/workspace/Temporary/raddick/census_scratch/acs5/'

shapefile_basedir = '/home/idies/workspace/Storage/raddick/Baltimore/shapefiles/'
census_shapefile_tiger_basedir = '/home/idies/workspace/Temporary/raddick/census_scratch/shapefiles/'

inflation_dir = '/home/idies/workspace/Storage/raddick/Baltimore/community_reinvestment_act/datasets/inflation/'

figdir = thisdir + 'figures/'
outdir = thisdir + 'datasets_for_analysis/'

for x in [datadir, figdir]:
    if not(os.path.exists(x)):
        os.makedirs(x)

os.chdir(thisdir)
os.getcwd()
#os.listdir()

Importing packages...


'/home/idies/workspace/Storage/raddick/Baltimore/community_reinvestment_act'

# Read CRA loan data by census tract 2010-2017

In [3]:
# read aggregate data
agg_tracts_df = pandas.read_csv(datadir+'loans_by_census_tract_2010_2017.csv', encoding='utf-8', low_memory=False)
#agg_tracts_df = agg_tracts_df[agg_tracts_df['activity_year'] >= 2012]

# parse amounts in dollars into numerics
for thiscol in ['amtLoans1', 'amtLoans100k', 'amtLoans250k', 'amtLoansToSmallest', 'amtLoansTotal']:
    agg_tracts_df.loc[:, thiscol] = pandas.to_numeric(agg_tracts_df[thiscol].apply(lambda x: str(x).replace(',','')[1:]), errors='coerce')

# Add community statistical area names
tract_to_csa_df = pandas.read_csv('census_tract_to_neighborhood.csv', index_col='NAME10')
agg_tracts_df = agg_tracts_df.assign(CSA2010 = agg_tracts_df.join(tract_to_csa_df, how='left', on='census_tract')['CSA2010'])

# Get income group names
agg_tracts_df = agg_tracts_df.assign(income_group = np.nan)
agg_tracts_df.loc[agg_tracts_df['income_group_code'] == 1, 'income_group'] = '< 10% of Median Family Income (MFI)'
agg_tracts_df.loc[agg_tracts_df['income_group_code'] == 2, 'income_group'] = '10% to 20% of MFI'
agg_tracts_df.loc[agg_tracts_df['income_group_code'] == 3, 'income_group'] = '20% to 30% of MFI'
agg_tracts_df.loc[agg_tracts_df['income_group_code'] == 4, 'income_group'] = '30% to 40% of MFI'
agg_tracts_df.loc[agg_tracts_df['income_group_code'] == 5, 'income_group'] = '40% to 50% of MFI'
agg_tracts_df.loc[agg_tracts_df['income_group_code'] == 6, 'income_group'] = '50% to 60% of MFI'
agg_tracts_df.loc[agg_tracts_df['income_group_code'] == 7, 'income_group'] = '60% to 70% of MFI'
agg_tracts_df.loc[agg_tracts_df['income_group_code'] == 8, 'income_group'] = '70% to 80% of MFI'
agg_tracts_df.loc[agg_tracts_df['income_group_code'] == 9, 'income_group'] = '80% to 90% of MFI'
agg_tracts_df.loc[agg_tracts_df['income_group_code'] == 10, 'income_group'] = '90% to 100% of MFI'
agg_tracts_df.loc[agg_tracts_df['income_group_code'] == 11, 'income_group'] = '100% to 110% of MFI'
agg_tracts_df.loc[agg_tracts_df['income_group_code'] == 12, 'income_group'] = '110% to 120% of MFI'
agg_tracts_df.loc[agg_tracts_df['income_group_code'] == 13, 'income_group'] = '> 120% of MFI'

# Get levels (low, moderate, middle, upper)
agg_tracts_df = agg_tracts_df.assign(cra_level = np.nan)
agg_tracts_df.loc[(agg_tracts_df['income_group_code'] >= 1) & (agg_tracts_df['income_group_code'] <= 5), 'cra_level'] = 'low'
agg_tracts_df.loc[(agg_tracts_df['income_group_code'] >= 6) & (agg_tracts_df['income_group_code'] <= 8), 'cra_level'] = 'moderate'
agg_tracts_df.loc[(agg_tracts_df['income_group_code'] >= 9) & (agg_tracts_df['income_group_code'] <= 12), 'cra_level'] = 'middle'
agg_tracts_df.loc[(agg_tracts_df['income_group_code'] == 13), 'cra_level'] = 'upper'
agg_tracts_df.loc[(agg_tracts_df['income_group_code'] == 14), 'cra_level'] = 'unknown'

#agg_tracts_df = agg_tracts_df.merge(tract_shapes_gdf, how='left', left_on='census_tract', right_on='NAME')

#agg_tracts_gdf = geopandas.GeoDataFrame(agg_tracts_df, crs=tract_shapes_gdf.crs, geometry='geometry')

#agg_tracts_df.groupby('activity_year').size()
#print('Done')

print('Keeping only 2011-2017 data...')
agg_tracts_df = agg_tracts_df[agg_tracts_df['activity_year'] >= 2011]
agg_tracts_df.head(1)



Keeping only 2011-2017 data...


Unnamed: 0,census_tract,activity_year,income_group_code,nLoans1,amtLoans1,nLoans100k,amtLoans100k,nLoans250k,amtLoans250k,nLoansToSmallest,amtLoansToSmallest,nLoansTotal,amtLoansTotal,CSA2010,income_group,cra_level
0,101.0,2017,13,73,1047000,0,0,0,0,43,621000,73,1047000,Canton,> 120% of MFI,upper


### Check: how many census tracts reported each year?

In [3]:
print("Number of census tracts with loans per year, 2011-2017")
agg_tracts_df.groupby('activity_year').size()

Number of census tracts with loans per year, 2011-2017


activity_year
2011    197
2012    198
2013    197
2014    198
2015    199
2016    200
2017    199
dtype: int64

# Add census data (ACS 5-year estimates for Macvars 2011-2017)

In [4]:
reinvestment_df = pandas.DataFrame()

for thisyear in range(2017,2010,-1):
    #agg_tracts_gdf = agg_tracts_gdf.set_index(['census_tract', 'activity_year'])
    xdf = agg_tracts_df[agg_tracts_df['activity_year'] == thisyear]
    xdf = xdf.set_index('census_tract')
    if (debug > 0):
        print('Getting loans for year {0:.0f}...'.format(thisyear))
    if (debug > 1):
        print('reading ACS5 data...')
    metadata_df = pandas.read_csv(census_data_dir+'acs5_metadata_md_2017.csv', encoding='utf-8', index_col='varnum')
    metadata_df = metadata_df.sort_index()
    acs5_x_df = pandas.read_csv(census_data_dir+'acs5_md_2017.csv', encoding='utf-8')
    numeric_columns = [x for x in acs5_x_df.columns.tolist() if '_' in x]
    for x in numeric_columns:
        acs5_x_df.loc[:, x] = pandas.to_numeric(acs5_x_df[x], errors='coerce')

    if (debug > 1):
        print('keeping census data only for Baltimore City tracts...')
    acs5_x_df = acs5_x_df[acs5_x_df['GEOID'].apply(lambda x: '14000US24510' in x)]
    
    if (debug > 1):
        print('merging census data with loan data...')
    acs5_x_df = acs5_x_df.assign(census_tract = pandas.to_numeric(acs5_x_df['GEOID'].apply(lambda x: x[12:16]+'.'+x[16:]), errors='coerce'))
    xdf = xdf.reset_index().merge(acs5_x_df, how='left', on='census_tract').set_index('census_tract')

    if (debug > 1):
        print('Assembling years into one dataframe...')
    reinvestment_df = pandas.concat((reinvestment_df, xdf), axis=0)

print('indexing by tract and year...')
reinvestment_df = reinvestment_df.reset_index()
reinvestment_df = reinvestment_df.set_index(['census_tract', 'activity_year'])

print('backing up...')
reinvestment_df_bk = reinvestment_df
metadata_df_bk = metadata_df

#reinvestment_df.sample(1).T
print('ok')

Getting loans for year 2017...
reading ACS5 data...
keeping census data only for Baltimore City tracts...
merging census data with loan data...
Assembling years into one dataframe...
Getting loans for year 2016...
reading ACS5 data...
keeping census data only for Baltimore City tracts...
merging census data with loan data...
Assembling years into one dataframe...
Getting loans for year 2015...
reading ACS5 data...
keeping census data only for Baltimore City tracts...
merging census data with loan data...
Assembling years into one dataframe...
Getting loans for year 2014...
reading ACS5 data...
keeping census data only for Baltimore City tracts...
merging census data with loan data...
Assembling years into one dataframe...
Getting loans for year 2013...
reading ACS5 data...
keeping census data only for Baltimore City tracts...
merging census data with loan data...
Assembling years into one dataframe...
Getting loans for year 2012...
reading ACS5 data...
keeping census data only for Balt

## Give ACS5 census variables human-readable names

### Estimates

In [5]:
# IV: population by race; owner-occupied units; MFI; hs grad pct (age 25 and older); 
### female hoh pct; unemployment pct (age 18 and older); poverty pct; median home value;
### median home year built

print('getting from backup...')
reinvestment_df = reinvestment_df_bk
#metadata_df = metadata_df_bk
#metadata_df = metadata_df.set_index('variable')

print('\ncalculating and renaming estimates columns for IVs...')

if (debug >= 1):
    print('...race, owner-occupied units, mfi...')
reinvestment_df = reinvestment_df.rename(columns = {
    'B02001_002': 'pop_white',
    'B02001_003': 'pop_black',
    'B25003_002': 'owner_occ_housing_units',
    'B19113_001': 'mfi'    
})
if (debug >= 1):
    print('...high school graduates 25 years and older...')
reinvestment_df = reinvestment_df.assign(hs_grad_25plus = pandas.to_numeric(reinvestment_df['B15002_011'] + reinvestment_df['B15002_028'], errors='coerce'))
if (debug >= 1):
    print('...householder sex & race, unempoyment, poverty, home value, home age...')
reinvestment_df = reinvestment_df.rename(columns = {     
    'B11001_006': 'female_householder',
    'B11001B_001': 'black_householder',
    'B11001H_001': 'white_householder',
    'B23025_005': 'unemployed_16plus',
    'B17001_002': 'poverty_past_12_months',
    'B25077_001': 'median_home_value',
    'B25035_001': 'median_year_built'
})

print('\ncalculating and renaming estimates columns for comparison variables...')
if (debug >= 1):
    print('...race, owner-occupied units, mfi...')
reinvestment_df = reinvestment_df.rename(columns = {
    'B01001_001': 'pop_total',
    'B11001_001': 'total_householders',
    'B17001_001': 'poverty_status_known_last12months_total'
})
if (debug >= 1):
    print('...population 25plus...')
reinvestment_df = reinvestment_df.assign(pop_25plus = 
                                         pandas.to_numeric(
                                             (reinvestment_df['B01001_011'] + reinvestment_df['B01001_012'] + reinvestment_df['B01001_013'] 
                                              + reinvestment_df['B01001_014'] + reinvestment_df['B01001_015'] + reinvestment_df['B01001_016']
                                              + reinvestment_df['B01001_017'] + reinvestment_df['B01001_018'] + reinvestment_df['B01001_019']
                                              + reinvestment_df['B01001_020'] + reinvestment_df['B01001_021'] + reinvestment_df['B01001_022']
                                              + reinvestment_df['B01001_023'] + reinvestment_df['B01001_024'] + reinvestment_df['B01001_025']
                                              + reinvestment_df['B01001_035'] + reinvestment_df['B01001_036'] + reinvestment_df['B01001_037']
                                              + reinvestment_df['B01001_038'] + reinvestment_df['B01001_039'] + reinvestment_df['B01001_040']
                                              + reinvestment_df['B01001_041'] + reinvestment_df['B01001_042'] + reinvestment_df['B01001_043']
                                              + reinvestment_df['B01001_044'] + reinvestment_df['B01001_045'] + reinvestment_df['B01001_046']
                                              + reinvestment_df['B01001_047'] + reinvestment_df['B01001_048'] + reinvestment_df['B01001_049']
                                             ), errors='coerce'
                                         )
                                        )
if (debug >= 1):
    print('...labor force, poverty status known...')
reinvestment_df = reinvestment_df.rename(columns = {
    'B23025_002': 'labor_force_16plus',
    'B17001_001': 'poverty_status_known'
})

print('\nbacking up...')
reinvestment_df_bk = reinvestment_df
print('ok')

getting from backup...

calculating and renaming estimates columns for IVs...
...race, owner-occupied units, mfi...
...high school graduates 25 years and older...
...householder sex & race, unempoyment, poverty, home value, home age...

calculating and renaming estimates columns for comparison variables...
...race, owner-occupied units, mfi...
...population 25plus...
...labor force, poverty status known...

backing up...
ok


### Margins of error

#### Define functions to calculate standard error

In [6]:
### Guide on how to calculate errors in percentages:
# https://www.census.gov/content/dam/Census/library/publications/2018/acs/acs_general_handbook_2018_ch08.pdf
    
## Aggregating Data Across Population Subgroups: add error for each group in quadrature, divide by 1.645 for serr

def find_serr_hsgrad25plus(row):
    return pandas.to_numeric(np.sqrt(row['B15002_011_err']**2 + row['B15002_028_err']**2) / 1.645, errors='coerce')

def find_serr_pop25plus(row):
    return pandas.to_numeric(np.sqrt(row['B01001_011_err']**2 + row['B01001_012_err']**2 + row['B01001_013_err']**2 
                                     + row['B01001_014_err']**2 + row['B01001_015_err']**2 + row['B01001_016_err']**2 
                                     + row['B01001_017_err']**2 + row['B01001_018_err']**2 + row['B01001_019_err']**2 
                                     + row['B01001_020_err']**2 + row['B01001_021_err']**2 + row['B01001_022_err']**2 
                                     + row['B01001_023_err']**2 + row['B01001_024_err']**2 + row['B01001_025_err']**2 
                                     + row['B01001_035_err']**2 + row['B01001_036_err']**2 + row['B01001_037_err']**2 
                                     + row['B01001_038_err']**2 + row['B01001_039_err']**2 + row['B01001_040_err']**2 
                                     + row['B01001_041_err']**2 + row['B01001_042_err']**2 + row['B01001_043_err']**2 
                                     + row['B01001_044_err']**2 + row['B01001_045_err']**2 + row['B01001_046_err']**2 
                                     + row['B01001_047_err']**2 + row['B01001_048_err']**2 + row['B01001_049_err']**2 
                                    ) / 1.645, errors='coerce')
print('Defined standard-error-calculating functions!')
print('ok')

Defined standard-error-calculating functions!
ok


#### Get margins of error for all columns

In [7]:
print('getting from backup...')
reinvestment_df = reinvestment_df_bk
#metadata_df = metadata_df_bk
#metadata_df = metadata_df.set_index('variable')

print('\ncalculating and renaming margins of error columns for IVs...')

if (debug >= 1):
    print('...margins for race, owner-occupied units, mfi...')
reinvestment_df = reinvestment_df.rename(columns = {
    'B02001_002_err': 'pop_white_err',
    'B02001_003_err': 'pop_black_err',
    'B25003_002_err': 'owner_occ_housing_units_err',
    'B19113_001_err': 'mfi_err'    
})

if (debug >= 1):
    print('...standard errors for hs graduates 25 and older (using custom serr-finding function...')
reinvestment_df = reinvestment_df.assign(hs_grad_25plus_serr = pandas.to_numeric(reinvestment_df.apply(lambda row: find_serr_hsgrad25plus(row), axis=1), errors='coerce'))

if (debug >= 1):
    print('...margins of error for householder sex & race, unempoyment, poverty, home value, home age...')
reinvestment_df = reinvestment_df.rename(columns = {     
    'B11001_006_err': 'female_householder_err',
    'B11001B_001_err': 'black_householder_err',
    'B11001H_001_err': 'white_householder_err',
    'B23025_005_err': 'unemployed_16plus_err',
    'B17001_002_err': 'poverty_past_12_months_err',
    'B25077_001_err': 'median_home_value_err',
    'B25035_001_err': 'median_year_built_err'
})

print('\ncalculating and renaming margins of error for comparison variables...')
if (debug >= 1):
    print('...race, owner-occupied units, mfi...')
reinvestment_df = reinvestment_df.rename(columns = {
    'B01001_001_err': 'pop_total_err',
    'B11001_001_err': 'total_householders_err',
    'B17001_001_err': 'poverty_status_known_last12months_total_err'
})
if (debug >= 1):
    print('...population 25plus...')
reinvestment_df = reinvestment_df.assign(pop_25plus_serr = pandas.to_numeric(reinvestment_df.apply(lambda row: find_serr_pop25plus(row), axis=1), errors='coerce'))


if (debug >= 1):
    print('...labor force, poverty status known...')
reinvestment_df = reinvestment_df.rename(columns = {
    'B23025_002_err': 'labor_force_16plus_err',
    'B17001_001_err': 'poverty_status_known_err'
})

print('\nbacking up...')
reinvestment_df_bk = reinvestment_df

print('ok')
#reinvestment_df.sample(1).T

#reinvestment_df[['hs_grad_25plus', 'hs_grad_25plus_serr', 'pop_25plus', 'pop_25plus_serr']]


getting from backup...

calculating and renaming margins of error columns for IVs...
...margins for race, owner-occupied units, mfi...
...standard errors for hs graduates 25 and older (using custom serr-finding function...
...margins of error for householder sex & race, unempoyment, poverty, home value, home age...

calculating and renaming margins of error for comparison variables...
...race, owner-occupied units, mfi...
...population 25plus...
...labor force, poverty status known...

backing up...
ok


### Optional for later: drop partial variables used in calculations...

In [None]:
'''
print('getting from backup...')
reinvestment_df = reinvestment_df_bk

print('\ndropping partial variables used in calculations...')
reinvestment_df = reinvestment_df.drop(
    [
     'B15002_011', 'B15002_028', 'B01001_011', 'B01001_012', 'B01001_013', 'B01001_014', 'B01001_015', 'B01001_016', 
     'B01001_017', 'B01001_018', 'B01001_019', 'B01001_020', 'B01001_021', 'B01001_022', 'B01001_023', 'B01001_024', 'B01001_025', 
     'B01001_035', 'B01001_036', 'B01001_037', 'B01001_038', 'B01001_039', 'B01001_040', 'B01001_041', 'B01001_042',
     'B01001_043', 'B01001_044', 'B01001_045', 'B01001_046', 'B01001_047', 'B01001_048', 'B01001_049'
    ]
, axis=1)
reinvestment_df = reinvestment_df.drop(
    [
     'B15002_011', 'B15002_028', 'B01001_011', 'B01001_012', 'B01001_013', 'B01001_014', 'B01001_015', 'B01001_016', 
     'B01001_017', 'B01001_018', 'B01001_019', 'B01001_020', 'B01001_021', 'B01001_022', 'B01001_023', 'B01001_024', 'B01001_025', 
     'B01001_035', 'B01001_036', 'B01001_037', 'B01001_038', 'B01001_039', 'B01001_040', 'B01001_041', 'B01001_042',
     'B01001_043', 'B01001_044', 'B01001_045', 'B01001_046', 'B01001_047', 'B01001_048', 'B01001_049'
    ]
, axis=1)

print('backing up...')
reinvestment_df_bk = reinvestment_df

print('ok')
'''

## Calculate percentages of each ACS5 variable 

For better direct comparison

### Which variables?

In [8]:
vars_for_percentification = ['pop_white', 'pop_black', 'black_householder', 'white_householder']
vars_for_percentification += ['owner_occ_housing_units', 'hs_grad_25plus', 'female_householder']
vars_for_percentification += ['unemployed_16plus', 'poverty_past_12_months']

vars_for_percentification += ['pop_white_err', 'pop_black_err', 'black_householder_err', 'white_householder_err']
vars_for_percentification += ['owner_occ_housing_units_err', 'hs_grad_25plus_serr', 'female_householder_err']
vars_for_percentification += ['unemployed_16plus_err', 'poverty_past_12_months_err']

vars_for_percentification += ['pop_total', 'total_householders', 'pop_25plus', 'labor_force_16plus']
vars_for_percentification += ['poverty_status_known_last12months_total']

vars_for_percentification += ['pop_total_err', 'total_householders_err', 'pop_25plus_serr', 'labor_force_16plus_err']
vars_for_percentification += ['poverty_status_known_last12months_total_err']

reinvestment_df[vars_for_percentification].columns.tolist()

['pop_white',
 'pop_black',
 'black_householder',
 'white_householder',
 'owner_occ_housing_units',
 'hs_grad_25plus',
 'female_householder',
 'unemployed_16plus',
 'poverty_past_12_months',
 'pop_white_err',
 'pop_black_err',
 'black_householder_err',
 'white_householder_err',
 'owner_occ_housing_units_err',
 'hs_grad_25plus_serr',
 'female_householder_err',
 'unemployed_16plus_err',
 'poverty_past_12_months_err',
 'pop_total',
 'total_householders',
 'pop_25plus',
 'labor_force_16plus',
 'poverty_status_known_last12months_total',
 'pop_total_err',
 'total_householders_err',
 'pop_25plus_serr',
 'labor_force_16plus_err',
 'poverty_status_known_last12months_total_err']

### Estimates

#### Define functions to calculate percentages in estimates

In [1]:
def find_pop_white_pct(row):
    try:
        return pandas.to_numeric(row['pop_white'] / row['pop_total'], errors='coerce')
    except ZeroDivisionError:
        return np.nan
def find_pop_black_pct(row):
    try:
        return pandas.to_numeric(row['pop_black'] / row['pop_total'], errors='coerce')
    except ZeroDivisionError:
        return np.nan

def find_white_householder_pct(row):
    try:
        return pandas.to_numeric(row['white_householder'] / row['total_householders'], errors='coerce')
    except ZeroDivisionError:
        return np.nan
def find_black_householder_pct(row):
    try:
        return pandas.to_numeric(row['black_householder'] / row['total_householders'], errors='coerce')
    except ZeroDivisionError:
        return np.nan
def find_female_householder_pct(row):
    try:
        return pandas.to_numeric(row['female_householder'] / row['total_householders'], errors='coerce')
    except ZeroDivisionError:
        return np.nan

def find_hs_grad_25plus_pct(row):
    try:
        return pandas.to_numeric(row['hs_grad_25plus'] / row['pop_25plus'], errors='coerce')
    except ZeroDivisionError:
        return np.nan
def find_unemployed_16plus_pct(row):
    try:
        return pandas.to_numeric(row['unemployed_16plus'] / row['labor_force_16plus'], errors='coerce')
    except ZeroDivisionError:
        return np.nan
def find_poverty_past_12_months_pct(row):
    try:
        return pandas.to_numeric(row['poverty_past_12_months'] / row['poverty_status_known_last12months_total'], errors='coerce')
    except ZeroDivisionError:
        return np.nan
    
print('Defined functions to calculate percentages!')
print('ok')

Defined functions to calculate percentages!
ok


#### Calculate percentages

In [10]:
print('getting from backup...')
reinvestment_df = reinvestment_df_bk

# compare to pop_total: pop_white, pop_black
# compare to total_householders: black_householder, white_householder, female_householder
# compare to pop_25plus: hs_grad_25plus
# compare to labor_force_16plus: unemployed_16plus
# compare to poverty_status_known_last12months_total: poverty_past_12_months

print('\ncalculating percentages...')
print('...white...')
reinvestment_df = reinvestment_df.assign(pop_white_pct = reinvestment_df.apply(lambda row: find_pop_white_pct(row), axis=1))
print('...black...')
reinvestment_df = reinvestment_df.assign(pop_black_pct = reinvestment_df.apply(lambda row: find_pop_black_pct(row), axis=1))
print('...white householder...')
reinvestment_df = reinvestment_df.assign(white_householder_pct = reinvestment_df.apply(lambda row: find_white_householder_pct(row), axis=1))
print('...black householder...')
reinvestment_df = reinvestment_df.assign(black_householder_pct = reinvestment_df.apply(lambda row: find_black_householder_pct(row), axis=1))
print('...female householder...')
reinvestment_df = reinvestment_df.assign(female_householder_pct = reinvestment_df.apply(lambda row: find_female_householder_pct(row), axis=1))

print('...hs graduates age 25 and over...')
reinvestment_df = reinvestment_df.assign(hs_grad_25plus_pct = reinvestment_df.apply(lambda row: find_hs_grad_25plus_pct(row), axis=1))
print('...unemployed age 16 and over...')
reinvestment_df = reinvestment_df.assign(unemployed_16plus_pct = reinvestment_df.apply(lambda row: find_unemployed_16plus_pct(row), axis=1))
print('...poverty status last 12 months...')
reinvestment_df = reinvestment_df.assign(poverty_past_12_months_pct = reinvestment_df.apply(lambda row: find_poverty_past_12_months_pct(row), axis=1))

print('\nbacking up...')
reinvestment_df_bk = reinvestment_df

percentified_vars = ['pop_white_pct', 'pop_black_pct', 'white_householder_pct', 'black_householder_pct', 'hs_grad_25plus_pct', 'unemployed_16plus_pct', 'poverty_past_12_months_pct']
#reinvestment_df[percentified_vars]
print('ok')
#reinvestment_df[vars_for_percentification].sample(1).T

getting from backup...

calculating percentages...
...white...
...black...
...white householder...
...black householder...
...female householder...
...hs graduates age 25 and over...
...unemployed age 16 and over...
...poverty status last 12 months...

backing up...
ok


### Margins of error

#### Define functions to calculate standard errors in percentages

In [11]:
#Guide on how to do this:
#### https://www.census.gov/content/dam/Census/library/publications/2018/acs/acs_general_handbook_2018_ch08.pdf

# X and Y are the measured values (not the errors) - X for the subsgroup and Y for the whole sample
# Let P = X/Y  (the proportion we calculated in the last step)
# dX and dY are the measured errors
# dP = (1/Y) * np.sqrt(dX**2 - (P**2 * dY**2))
# Standard error of P is dP/1.645
#### this calculation is done verbosely in fnid_pop_white_serr, quickly in other functions

def find_pop_white_pct_serr(row, verboselevel = 0):
    X = row['pop_white']
    dX = row['pop_white_err']
    Y = row['pop_total']
    dY = row['pop_total_err']
    try:
        P = X / Y
        oneoverY = 1 / Y
        dXsq = dX**2
        dYsq = dY**2
        Psq = P**2
        PsqdYsq = Psq * dYsq
        if (PsqdYsq <= dXsq):
            underroot = dXsq - PsqdYsq
        else:
            underroot = dXsq + PsqdYsq
        rooty = np.sqrt(underroot)
        dP = oneoverY * rooty
        SE = dP / 1.645
        if (verboselevel >= 2):
            print('\n')
            print('Census tract {0:} in year {1:.0f}:'.format(row.name[0], row.name[1]))
            print('X = pop_white, Y = pop_total')
            print('X = {0:.0f}, dX = {1:.0f} ({2:.1%} error)'.format(X, dX, dX/X))
            print('Y = {0:.0f}, dY = {1:.0f} ({2:.1%} error)'.format(Y, dY, dY/Y))
            print('P = {0:.3f}'.format(P))
            print('dXsq = {0:.0f}, dYsq = {1:.0f}, Psq = {2:.3f}'.format(dXsq, dYsq, Psq))
            print('PsqdYsq = {0:.0f}, underroot = {1:.0f}, rooty = {2:.3f}'.format(PsqdYsq, underroot, rooty))
            print('dP = {0:.3f}'.format(dP))
            print('SE = {0:.3f}'.format(SE))
        if (verboselevel >= 1):
            print('RESULT: {0:.2%} +/- {1:.2%}'.format(P, SE)) 
        return pandas.to_numeric(SE, errors='coerce')
    except ZeroDivisionError:
        return np.nan

def find_pop_black_pct_serr(row):
    try:
        if ((((row['pop_black'] / row['pop_total'])**2) * (row['pop_total_err']**2)) <= (row['pop_black_err']**2)):
            return pandas.to_numeric(((1 / row['pop_total']) * np.sqrt((row['pop_black_err']**2) - (((row['pop_black'] / row['pop_total'])**2) * (row['pop_total_err']**2)))) / 1.645, errors='coerce')
        else:
            return pandas.to_numeric(((1 / row['pop_total']) * np.sqrt((row['pop_black_err']**2) + (((row['pop_black'] / row['pop_total'])**2) * (row['pop_total_err']**2)))) / 1.645, errors='coerce')
    except ZeroDivisionError:
        return np.nan
    
def find_white_householder_pct_serr(row):
    try:
        if ((((row['white_householder'] / row['total_householders'])**2) * (row['total_householders_err']**2)) <= (row['white_householder_err']**2)):
            return pandas.to_numeric(((1 / row['total_householders']) * np.sqrt((row['white_householder_err']**2) - (((row['white_householder'] / row['total_householders'])**2) * (row['total_householders_err']**2)))) / 1.645, errors='coerce')
        else:
            return pandas.to_numeric(((1 / row['total_householders']) * np.sqrt((row['white_householder_err']**2) + (((row['white_householder'] / row['total_householders'])**2) * (row['total_householders_err']**2)))) / 1.645, errors='coerce')
    except ZeroDivisionError:
        return np.nan
def find_black_householder_pct_serr(row):
    try:
        if ((((row['black_householder'] / row['total_householders'])**2) * (row['total_householders_err']**2)) <= (row['black_householder_err']**2)):
            return pandas.to_numeric(((1 / row['total_householders']) * np.sqrt((row['black_householder_err']**2) - (((row['black_householder'] / row['total_householders'])**2) * (row['total_householders_err']**2)))) / 1.645, errors='coerce')
        else:
            return pandas.to_numeric(((1 / row['total_householders']) * np.sqrt((row['black_householder_err']**2) + (((row['black_householder'] / row['total_householders'])**2) * (row['total_householders_err']**2)))) / 1.645, errors='coerce')
    except ZeroDivisionError:
        return np.nan
def find_female_householder_pct_serr(row):
    try:
        if ((((row['female_householder'] / row['total_householders'])**2) * (row['total_householders_err']**2)) <= (row['female_householder_err']**2)):
            return pandas.to_numeric(((1 / row['total_householders']) * np.sqrt((row['female_householder_err']**2) - (((row['female_householder'] / row['total_householders'])**2) * (row['total_householders_err']**2)))) / 1.645, errors='coerce')
        else:
            return pandas.to_numeric(((1 / row['total_householders']) * np.sqrt((row['female_householder_err']**2) + (((row['female_householder'] / row['total_householders'])**2) * (row['total_householders_err']**2)))) / 1.645, errors='coerce')
    except ZeroDivisionError:
        return np.nan

def find_hs_grad_25plus_pct_serr(row):
    try:
        if ((((row['hs_grad_25plus'] / row['pop_25plus'])**2) * (row['pop_25plus_serr']**2)) <= (row['hs_grad_25plus_serr']**2)):
            return pandas.to_numeric(((1 / row['pop_25plus']) * np.sqrt((row['hs_grad_25plus_serr']**2) - (((row['hs_grad_25plus'] / row['pop_25plus'])**2) * (row['pop_25plus_serr']**2)))) / 1.645, errors='coerce')
        else:
            return pandas.to_numeric(((1 / row['pop_25plus']) * np.sqrt((row['hs_grad_25plus_serr']**2) + (((row['hs_grad_25plus'] / row['pop_25plus'])**2) * (row['pop_25plus_serr']**2)))) / 1.645, errors='coerce')
    except ZeroDivisionError:
        return np.nan

def find_unemployed_16plus_pct_serr(row):
    try:
        if ((((row['unemployed_16plus'] / row['labor_force_16plus'])**2) * (row['labor_force_16plus_err']**2)) <= (row['unemployed_16plus_err']**2)):
            return pandas.to_numeric(((1 / row['labor_force_16plus']) * np.sqrt((row['unemployed_16plus_err']**2) - (((row['unemployed_16plus'] / row['labor_force_16plus'])**2) * (row['labor_force_16plus_err']**2)))) / 1.645, errors='coerce')
        else:
            return pandas.to_numeric(((1 / row['labor_force_16plus']) * np.sqrt((row['unemployed_16plus_err']**2) + (((row['unemployed_16plus'] / row['labor_force_16plus'])**2) * (row['labor_force_16plus_err']**2)))) / 1.645, errors='coerce')
    except ZeroDivisionError:
        return np.nan

def find_poverty_past_12_months_pct_serr(row):
    try:
        if ((((row['poverty_past_12_months'] / row['poverty_status_known_last12months_total'])**2) * (row['poverty_status_known_last12months_total_err']**2)) <= (row['poverty_past_12_months_err']**2)):
            return pandas.to_numeric(((1 / row['poverty_status_known_last12months_total']) * np.sqrt((row['poverty_past_12_months']**2) - (((row['poverty_past_12_months'] / row['poverty_status_known_last12months_total'])**2) * (row['poverty_status_known_last12months_total_err']**2)))) / 1.645, errors='coerce')
        else:
            return pandas.to_numeric(((1 / row['poverty_status_known_last12months_total']) * np.sqrt((row['poverty_past_12_months']**2) + (((row['poverty_past_12_months'] / row['poverty_status_known_last12months_total'])**2) * (row['poverty_status_known_last12months_total_err']**2)))) / 1.645, errors='coerce')
    except ZeroDivisionError:
        return np.nan
# compare to poverty_status_known_last12months_total: poverty_past_12_months

print('Defined functions to calculate standard errors in percentages!')
print('ok')

Defined functions to calculate standard errors in percentages!
ok


#### Calculate standard errors in percentages

In [12]:
print('getting from backup...')
reinvestment_df = reinvestment_df_bk

print('\ncalculating standard errors in percentages...')

print('...white...')
reinvestment_df = reinvestment_df.assign(pop_white_pct_serr = reinvestment_df.apply(lambda row: find_pop_white_pct_serr(row, verboselevel=0), axis=1))
print('...black...')
reinvestment_df = reinvestment_df.assign(pop_black_pct_serr = reinvestment_df.apply(lambda row: find_pop_black_pct_serr(row), axis=1))

print('...white householder...')
reinvestment_df = reinvestment_df.assign(white_householder_pct_serr = reinvestment_df.apply(lambda row: find_white_householder_pct_serr(row), axis=1))
print('...black householder...')
reinvestment_df = reinvestment_df.assign(black_householder_pct_serr = reinvestment_df.apply(lambda row: find_black_householder_pct_serr(row), axis=1))
print('...female householder...')
reinvestment_df = reinvestment_df.assign(female_householder_pct_serr = reinvestment_df.apply(lambda row: find_female_householder_pct_serr(row), axis=1))

print('...high school graduate 25 years or over...')
reinvestment_df = reinvestment_df.assign(hs_grad_25plus_pct_serr = reinvestment_df.apply(lambda row: find_hs_grad_25plus_pct_serr(row), axis=1))
print('...unemployed 16 years or over...')
reinvestment_df = reinvestment_df.assign(unemployed_16plus_pct_serr = reinvestment_df.apply(lambda row: find_unemployed_16plus_pct_serr(row), axis=1))
print('...in poverty last 12 months...')
reinvestment_df = reinvestment_df.assign(poverty_past_12_months_pct_serr = reinvestment_df.apply(lambda row: find_poverty_past_12_months_pct_serr(row), axis=1))

#reinvestment_df[['pop_total', 'pop_total_err', 'pop_white', 'pop_white_err', 'pop_white_pct', 'pop_white_pct_serr']]
#reinvestment_df[['poverty_past_12_months_pct', 'poverty_past_12_months_pct_serr']]
print('backing up...')
reinvestment_df_bk = reinvestment_df
print('ok')

getting from backup...

calculating standard errors in percentages...
...white...
...black...
...white householder...
...black householder...
...female householder...
...high school graduate 25 years or over...
...unemployed 16 years or over...
...in poverty last 12 months...
backing up...
ok


## Add working loans column (ask Mac)

In [13]:
print('getting from backup...')
reinvestment_df = reinvestment_df_bk
reinvestment_df = reinvestment_df.assign(avgSmallLoan = reinvestment_df['amtLoans1'] / reinvestment_df['nLoans1'])

reinvestment_df = reinvestment_df.assign(nWorkingLoans = reinvestment_df['nLoansTotal'][reinvestment_df['avgSmallLoan'] < 10000] - reinvestment_df['nLoans1'][reinvestment_df['avgSmallLoan'] < 10000])
reinvestment_df.loc[reinvestment_df['nWorkingLoans'].isnull(), 'nWorkingLoans'] = reinvestment_df['nLoansTotal']

reinvestment_df = reinvestment_df.assign(amtWorkingLoans = reinvestment_df['amtLoansTotal'][reinvestment_df['avgSmallLoan'] < 10000] - reinvestment_df['amtLoans1'][reinvestment_df['avgSmallLoan'] < 10000])
reinvestment_df.loc[reinvestment_df['amtWorkingLoans'].isnull(), 'amtWorkingLoans'] = reinvestment_df['amtLoansTotal']

print('backing up...')
reinvestment_df_bk = reinvestment_df
print('ok')


getting from backup...
backing up...
ok


# Add number of jobs per census tract

## Get jobs data

In [14]:
raw_jobs_df = pandas.read_csv(datadir+'md_wac_S000_JT02_2010_to_2015.csv')
raw_jobs_df.columns = [x.strip() for x in raw_jobs_df.columns.tolist()]

# GeoID format is STATE+COUNTY+TRACT+BLOCK (2+3+6+4 = 15 characters)
raw_jobs_df = raw_jobs_df.assign(census_tract = pandas.to_numeric(raw_jobs_df['Workplace Census Block Code'].apply(lambda x: str(x)[5:9] + '.' + str(x)[9:11]), errors='coerce'))
sum_columns = [x for x in raw_jobs_df.columns.tolist() if x not in ('Workplace Census Block Code', 'Year')]

jobs_df = pandas.DataFrame()
for i in range(2010,2018):
    jobs_i_df = raw_jobs_df[sum_columns][raw_jobs_df['Year'] == i].groupby('census_tract', as_index=False).sum()
    if (i >= 2016):
        jobs_i_df = raw_jobs_df[sum_columns][raw_jobs_df['Year'] == 2015].groupby('census_tract', as_index=False).sum()
    jobs_i_df = jobs_i_df.assign(Year = i)
    jobs_df = pandas.concat((jobs_df, jobs_i_df), axis=0)

jobs_df = jobs_df.rename(columns={'Year': 'activity_year'})
jobs_df = jobs_df.set_index(['census_tract', 'activity_year'])

print('backing up...')
jobs_df_bk = jobs_df
print('ok')



reading job data...
backing up...
ok


In [None]:
jobs_df.columns

## Merge reinvestment data with jobs data

In [15]:
print('retrieiving from backup...')
reinvestment_df = reinvestment_df_bk

reinvestment_df = reinvestment_df.reset_index().merge(jobs_df.reset_index(), how='left').set_index(['census_tract', 'activity_year'])
#    #(reinvestment_df['activity_year'] == 2015) & 
#    #(reinvestment_df['census_tract'] <= 900)
#    jobs_df, how='left'
#)[['census_tract', 'activity_year', 'nWorkingLoans', 'Total number of jobs']]
#reinvestment_df
#jobs_df

print('backing up...')
reinvestment_df_bk = reinvestment_df
#reinvestment_df.sample(1).T
print('Done!')



retrieiving from backup...
backing up...
Done!


## Normalize loans by number of jobs at firms with firms of 0-19 employees

In [16]:
print('getting from backup...')
reinvestment_df = reinvestment_df_bk

reinvestment_df = reinvestment_df.assign(nLoans1_per_sbjob = reinvestment_df['nLoans1'] / reinvestment_df['Number of jobs for workers at firms with Firm Size: 0-19 Employees'])
reinvestment_df = reinvestment_df.assign(amtLoans1_per_sbjob = reinvestment_df['amtLoans1'] / reinvestment_df['Number of jobs for workers at firms with Firm Size: 0-19 Employees'])
reinvestment_df = reinvestment_df.assign(nLoans100k_per_sbjob = reinvestment_df['nLoans100k'] / reinvestment_df['Number of jobs for workers at firms with Firm Size: 0-19 Employees'])
reinvestment_df = reinvestment_df.assign(amtLoans100k_per_sbjob = reinvestment_df['amtLoans100k'] / reinvestment_df['Number of jobs for workers at firms with Firm Size: 0-19 Employees'])
reinvestment_df = reinvestment_df.assign(nLoans250k_per_sbjob = reinvestment_df['nLoans250k'] / reinvestment_df['Number of jobs for workers at firms with Firm Size: 0-19 Employees'])
reinvestment_df = reinvestment_df.assign(amtLoans250k_per_sbjob = reinvestment_df['amtLoans250k'] / reinvestment_df['Number of jobs for workers at firms with Firm Size: 0-19 Employees'])
reinvestment_df = reinvestment_df.assign(nLoansToSmallest_per_sbjob = reinvestment_df['nLoansToSmallest'] / reinvestment_df['Number of jobs for workers at firms with Firm Size: 0-19 Employees'])
reinvestment_df = reinvestment_df.assign(amtLoansToSmallest_per_sbjob = reinvestment_df['amtLoansToSmallest'] / reinvestment_df['Number of jobs for workers at firms with Firm Size: 0-19 Employees'])
reinvestment_df = reinvestment_df.assign(nLoansTotal_per_sbjob = reinvestment_df['nLoansTotal'] / reinvestment_df['Number of jobs for workers at firms with Firm Size: 0-19 Employees'])
reinvestment_df = reinvestment_df.assign(amtLoansTotal_per_sbjob = reinvestment_df['amtLoansTotal'] / reinvestment_df['Number of jobs for workers at firms with Firm Size: 0-19 Employees'])
reinvestment_df = reinvestment_df.assign(nWorkingLoans_per_sbjob = reinvestment_df['nWorkingLoans'] / reinvestment_df['Number of jobs for workers at firms with Firm Size: 0-19 Employees'])
reinvestment_df = reinvestment_df.assign(amtWorkingLoans_per_sbjob = reinvestment_df['amtWorkingLoans'] / reinvestment_df['Number of jobs for workers at firms with Firm Size: 0-19 Employees'])


#reinvestment_df['amtWorkingLoans_per_job']
print('backing up...')
reinvestment_df_bk = reinvestment_df
print('ok')


getting from backup...
backing up...
ok


# Correct for inflation

In [17]:
#reinvestment_df.columns.tolist()
print('getting from backup...')
reinvestment_df = reinvestment_df_bk

money_columns = ['amtLoans1', 'amtLoans100k', 'amtLoans250k', 'amtLoansToSmallest']
money_columns += ['amtLoansTotal', 'amtWorkingLoans']
money_columns += ['mfi', 'median_home_value']
money_columns += ['amtLoans1_per_sbjob', 'amtLoans100k_per_sbjob', 'amtLoans250k_per_sbjob', 'amtLoansToSmallest_per_sbjob']
money_columns += ['amtLoansTotal_per_sbjob', 'amtWorkingLoans_per_sbjob']

print('getting inflation data...')
cpi_1913_2017_df = pandas.read_csv(inflation_dir+'cpi-1913-2017.csv', index_col='Year')
cpi_annual_s = cpi_1913_2017_df['Jan']
cpi_annual_s.name = 'rawfactor'
value_in_2017 = cpi_annual_s.loc[2017]
annual_inflator_s = 1 / (cpi_annual_s / value_in_2017)
annual_inflator_s

print('inflating pre-2017 monetary values...')
inflate_these_df = reinvestment_df[money_columns]

newcolnames = [x+'_adj' for x in inflate_these_df.columns.tolist()]
inflate_these_df.columns = newcolnames

inflated_df = pandas.DataFrame()

#inflate_these_df.xs(2017, level=1).apply(lambda x: x * annual_inflator_s.loc[2017])
#annual_inflator_s
for i in inflate_these_df.index.get_level_values(1).drop_duplicates().tolist():
    inflated_df_i = inflate_these_df.xs(i, level=1).apply(lambda x: x * annual_inflator_s.loc[i])
    inflated_df_i['activity_year'] = i
    inflated_df = pandas.concat((inflated_df, inflated_df_i), axis=0)
inflated_df = inflated_df.reset_index().set_index(['census_tract', 'activity_year'])

print('joining...')
reinvestment_df = reinvestment_df.join(inflated_df, how='left')

print('backing up...')
reinvestment_df_bk = reinvestment_df

print('ok')

getting from backup...
getting inflation data...
inflating pre-2017 monetary values...
joining...
backing up...
ok


# Write outfile

In [18]:
print('writing outfile...')
reinvestment_df.to_csv(outdir+'reinvestment_by_census_tract_for_smallest_businesses.csv', encoding='utf-8')
print('Done!')

writing outfile...
Done!


In [19]:
df = pandas.read_csv(outdir+'reinvestment_by_census_tract_for_smallest_businesses.csv', encoding='utf-8', index_col=['census_tract', 'activity_year'])
#df = df.set_index(['census_tract', 'activity_year'])
df.columns.tolist()

['income_group_code',
 'nLoans1',
 'amtLoans1',
 'nLoans100k',
 'amtLoans100k',
 'nLoans250k',
 'amtLoans250k',
 'nLoansToSmallest',
 'amtLoansToSmallest',
 'nLoansTotal',
 'amtLoansTotal',
 'CSA2010',
 'income_group',
 'cra_level',
 'GEOID',
 'STUSAB',
 'LOGRECNO',
 'pop_total',
 'total_householders',
 'pop_white',
 'pop_black',
 'black_householder',
 'white_householder',
 'owner_occ_housing_units',
 'mfi',
 'B15002_011',
 'B15002_028',
 'female_householder',
 'unemployed_16plus',
 'poverty_past_12_months',
 'median_home_value',
 'median_year_built',
 'B01001_011',
 'B01001_012',
 'B01001_013',
 'B01001_014',
 'B01001_015',
 'B01001_016',
 'B01001_017',
 'B01001_018',
 'B01001_019',
 'B01001_020',
 'B01001_021',
 'B01001_022',
 'B01001_023',
 'B01001_024',
 'B01001_025',
 'B01001_035',
 'B01001_036',
 'B01001_037',
 'B01001_038',
 'B01001_039',
 'B01001_040',
 'B01001_041',
 'B01001_042',
 'B01001_043',
 'B01001_044',
 'B01001_045',
 'B01001_046',
 'B01001_047',
 'B01001_048',
 'B0100

## Read some other datasets useful for mapping

In [None]:
print('Reading city boundary...')
outline_filename = shapefile_basedir + 'baltimore_city_polygon/baltimore_city_polygon.shp'
city_outline_gdf = geopandas.read_file(outline_filename)
city_outline_gdf = city_outline_gdf.to_crs(tract_shapes_gdf.crs)

print('Reading water features...')
water_filename = shapefile_basedir + 'water/water.shp'
water_gdf = geopandas.read_file(water_filename)
water_gdf = water_gdf.set_index('OBJECTID')
water_gdf = water_gdf.to_crs(tract_shapes_gdf.crs)


print('Finding location of JHU...')
businesses = []
biz1dict = {'name': 'Johns Hopkins University', 'address': '3400 N. Charles St. Baltimore, MD'}
#businesses.append(biz1dict)
#biz2dict = {'name': 'Refereshing Life Ministries', 'address': '2603 Baker St. Baltimore, MD'}
#businesses.append(biz2dict)
g = geocoder.bing(biz1dict['address'], key='Agrc_VFxa6iK3mVYNIC1Mcao2TwVTPG5tDbok7UbDcCYf5PRGmnaeLF_Wm_znHeo')
thegeometry = Point(g.latlng)
g_df = pandas.DataFrame(data=g.latlng)
g_df = g_df.T
g_df = g_df.rename(columns={0: 'lat', 1: 'long'})
g_df = g_df.assign(geometry=Point(g_df['long'], g_df['lat']))
g_gdf = geopandas.GeoDataFrame(data=g_df, geometry='geometry')
g_gdf.crs = {'init': 'epsg:4326'}
g_gdf = g_gdf.to_crs(tract_shapes_gdf.crs)

print('Reading streets...')
s = time.time()
streets_filename = shapefile_basedir + 'streets/streetcl.shp'
streets_gdf = geopandas.read_file(streets_filename)
streets_gdf = streets_gdf.set_index('OBJECTID')
streets_gdf = streets_gdf.to_crs(tract_shapes_gdf.crs)
e = time.time()
print('Read {0:,.0f} street centerlines in {1:,.1f} seconds.'.format(len(streets_gdf), e-s))

print('Done!')
#g_gdf


## 2017

In [None]:
print('getting from backup...')
reinvestment_df = reinvestment_df_bk

scale = 0.25

fig, ax = plt.subplots(1,1, figsize=(48*scale,48*scale))


xdf = df.xs(thisyear, level=1).reset_index().merge(tract_shapes_gdf, how='left', on='census_tract').set_index('census_tract')

xgdf = geopandas.GeoDataFrame(xdf)
xgdf.crs = tract_shapes_gdf.crs    

xgdf.plot(column='amtWorkingLoans_per_job_adj', ax=ax)
ax.set_aspect('equal')
ax.tick_params(axis='both', which='both', bottom=False, top=False, left=False, right=False, labelbottom=False, labeltop=False, labelleft=False, labelright=False)
ax.set_title(thisyear, fontsize=14*scale)
#reinvestment_df_temp = pandas.concat((reinvestment_df_temp, xdf), axis=0, sort=False)

#plt.tick_params(axis='both', which='both', bottom=False, top=False, left=False, right=False, labelbottom=False, labeltop=False, labelleft=False, labelright=False))
#reinvestment_2017_d

plt.show()


In [None]:
df.xs(thisyear, level=1).reset_index().merge(tract_shapes_gdf, how='left', on='census_tract').set_index('census_tract')['amtWorkingLoans_per_job_adj'].sort_values()

# Attempt to show each year on a single big-ass page of maps

print('getting from backup...')
reinvestment_df = reinvestment_df_bk

fig, axs = plt.subplots(1,7, figsize=(24,12))

thisyear = 2017

cnt = 0
for i in range(2017, 2010, -1):
    if (debug > 0):
        print('joining shapefiles for {0:.0f}...'.format(i))
    tract_shapes_gdf = geopandas.read_file(census_shapefile_tiger_basedir +'{0:.0f}/TRACT/tl_{0:.0f}_24_tract.shp'.format(thisyear))    
    tract_shapes_gdf = tract_shapes_gdf[tract_shapes_gdf['COUNTYFP'] == '510']
    tract_shapes_gdf.loc[:, 'NAME'] = pandas.to_numeric(tract_shapes_gdf['NAME'], errors='coerce')
    tract_shapes_gdf = tract_shapes_gdf.assign(census_tract = pandas.to_numeric(tract_shapes_gdf['GEOID'].apply(lambda x: x[5:9]+'.'+x[9:]), errors='coerce'))

    xdf = reinvestment_df.xs(i, level=1).reset_index().merge(tract_shapes_gdf, how='left', on='census_tract').set_index('census_tract')

    xgdf = geopandas.GeoDataFrame(xdf)
    xgdf.crs = tract_shapes_gdf.crs    
    xgdf.plot(column='amtWorkingLoans_per_job_adj', ax=axs[cnt])
    axs[cnt].set_aspect('equal')
    axs[cnt].tick_params(axis='both', which='both', bottom=False, top=False, left=False, right=False, labelbottom=False, labeltop=False, labelleft=False, labelright=False)
    axs[cnt].set_title(i, fontsize=14)
    cnt = cnt + 1    
#reinvestment_df_temp = pandas.concat((reinvestment_df_temp, xdf), axis=0, sort=False)

#plt.tick_params(axis='both', which='both', bottom=False, top=False, left=False, right=False, labelbottom=False, labeltop=False, labelleft=False, labelright=False))
#reinvestment_2017_d


plt.show()


print('getting from backup...')
reinvestment_df = reinvestment_df_bk


#levels_show_df = reinvestment_df.groupby('cra_level')[['amtWorkingLoans_per_job_adj']].sum()
levels_show_index = pandas.Index(reinvestment_df['cra_level'].drop_duplicates().values)
levels_show_df = pandas.DataFrame(data=None, columns=None, index=levels_show_index)
levels_show_df = levels_show_df.reindex(['low', 'moderate', 'middle', 'upper', 'unknown'])
