In [None]:
import os
import urllib
import pandas
import time
import numpy as np
from matplotlib import pyplot as plt
from IPython.display import display, HTML
datadir = '/home/idies/workspace/Temporary/raddick/cra_scratch/'
inflationdir = '/home/idies/workspace/Storage/raddick/Baltimore/inflation/'
figdir = '/home/idies/workspace/Storage/raddick/Baltimore/community_reinvestment_act/figures/'
print('ok')

In [None]:
s = time.time()
tracts_df = pandas.read_csv(datadir+'tracts_processed.csv', low_memory=False, index_col='rownumber')
e = time.time()
print('Read {0:,.0f} rows in {1:,.0f} minutes {2:,.0f} seconds!'.format(
    len(tracts_df), 
    np.floor((e-s) / 60), 
    (e-s) % 60
))
print('backing up...')
tracts_df_bk = tracts_df
tracts_df_all = tracts_df
print('ok')


In [None]:
print('getting from backup...')
tracts_df = tracts_df_bk
print('removing tracts with no loans received...')
tracts_df = tracts_df[tracts_df['loan_indicator'] == 'Y']
print('backing up...')
tracts_df_bk = tracts_df
print('getting only Baltimore City tracts...')
baltimore_tracts_df = tracts_df[
    (tracts_df['state'] == 24)
    & (tracts_df['county'] == 510)
]
print('backing up...')
baltimore_tracts_df_bk = baltimore_tracts_df
print('ok')

In [None]:
s = time.time()
loans_df = pandas.read_csv(datadir+'loans_processed.csv', low_memory=False, index_col='rownumber')
e = time.time()
print('Read {0:,.0f} rows in {1:,.0f} minutes {2:,.0f} seconds!'.format(
    len(loans_df), 
    np.floor((e-s) / 60), 
    (e-s) % 60
))
print('backing up...')
loans_df_all = loans_df

print('getting only Baltimore City tracts...')
baltimore_loans_df = loans_df[
    (loans_df['state'] == 24)
    & (loans_df['county'] == 510)
]
print('backing up...')
baltimore_loans_df_bk = baltimore_loans_df
print('ok')


In [None]:
def make_geoid(row):
    x = '14000US{0:02d}{1:03d}{2:06d}'.format(
        row['state'], 
        row['county'],
        int(100 * row['census_tract'])
    )
    return x
print('ok')

In [None]:
s = time.time()
print('getting from backup...')
baltimore_tracts_df=baltimore_tracts_df_bk
print('calculating geoid...')
baltimore_tracts_df = baltimore_tracts_df.assign(GEOID = baltimore_tracts_df.apply(lambda row: make_geoid(row), axis=1))
e = time.time()

print('Calculated {0:,.0f} geoids in {1:,.0f} minutes {2:,.0f} seconds!'.format(
    len(baltimore_tracts_df), 
    np.floor((e-s) / 60), 
    (e-s) % 60
))
print('backing up...')
baltimore_tracts_df_bk = baltimore_tracts_df
print('ok')

In [None]:
print('getting from backup...')
baltimore_loans_df = baltimore_loans_df_bk
baltimore_tracts_df = baltimore_tracts_df_bk

baltimore_loans_df = baltimore_loans_df.assign(nLoans = 
                                               baltimore_loans_df['nLoans1'] +
                                               baltimore_loans_df['nLoans100k'] +
                                               baltimore_loans_df['nLoans250k'])
baltimore_loans_df = baltimore_loans_df[baltimore_loans_df['nLoans'] > 0]
baltimore_loans_df_bk = baltimore_loans_df
print('ok')

In [None]:
sum_columns = ['nLoans1', 'amtLoans1', 'nLoans100k', 'amtLoans100k']
sum_columns += ['nLoans250k', 'amtLoans250k', 'nLoansToSmallest', 'amtLoansToSmallest']
sum_columns += ['nLoansAff', 'amtLoansAff']

print('creating loans_by_institution_df...')
loans_by_institution_df = baltimore_loans_df.groupby(['respondentID', 'activity_year'], as_index=False)[sum_columns].sum()

loans_by_institution_df = loans_by_institution_df.assign(nLoans = 
                                                         loans_by_institution_df['nLoans1']
                                                         + loans_by_institution_df['nLoans100k']
                                                         + loans_by_institution_df['nLoans250k']
                                                        )
loans_by_institution_df = loans_by_institution_df.assign(amtLoans = 
                                                         loans_by_institution_df['amtLoans1']
                                                         + loans_by_institution_df['amtLoans100k']
                                                         + loans_by_institution_df['amtLoans250k']
                                                        )

inflator_df = pandas.read_csv(inflationdir+'cpi-1913-2017.csv')

jan2017 = inflator_df[['Year', 'Jan']][inflator_df['Year'] == 2017]['Jan'].apply(lambda x: np.round(x,3)).values[0]#.set_index('Year')

inflation_factor_df = inflator_df[['Year', 'Jan']].set_index('Year')['Jan'].apply(lambda x: jan2017/x)
#inflation_factor_df
#loans_by_institution_df
loans_by_institution_df = loans_by_institution_df.merge(inflation_factor_df.reset_index(), how='left', left_on='activity_year', right_on='Year')
loans_by_institution_df = loans_by_institution_df.rename(columns={'Jan': 'inflation_factor'})

loans_by_institution_df = loans_by_institution_df.assign(amtLoans1_adj = loans_by_institution_df['amtLoans1'] * loans_by_institution_df['inflation_factor'])
loans_by_institution_df = loans_by_institution_df.assign(amtLoans100k_adj = loans_by_institution_df['amtLoans100k'] * loans_by_institution_df['inflation_factor'])
loans_by_institution_df = loans_by_institution_df.assign(amtLoans250k_adj = loans_by_institution_df['amtLoans250k'] * loans_by_institution_df['inflation_factor'])
loans_by_institution_df = loans_by_institution_df.assign(amtLoansToSmallest_adj = loans_by_institution_df['amtLoansToSmallest'] * loans_by_institution_df['inflation_factor'])
loans_by_institution_df = loans_by_institution_df.assign(amtLoansAff_adj = loans_by_institution_df['amtLoansAff'] * loans_by_institution_df['inflation_factor'])
loans_by_institution_df = loans_by_institution_df.assign(amtLoans_adj = loans_by_institution_df['amtLoans'] * loans_by_institution_df['inflation_factor'])

print('backing up...')
loans_by_institution_df_bk = loans_by_institution_df
print('ok')


In [None]:
inst_year_df = pandas.DataFrame(loans_by_institution_df.groupby(['respondentID', 'activity_year'])['nLoans'].sum(), columns=['nLoans'])
inst_year_df = inst_year_df.assign(amtLoans_adj = loans_by_institution_df.groupby(['respondentID', 'activity_year'])['amtLoans_adj'].sum())

inst_year_df = inst_year_df.assign(nLoans1 = loans_by_institution_df.groupby(['respondentID', 'activity_year'])['nLoans1'].sum())
inst_year_df = inst_year_df.assign(nLoans100k = loans_by_institution_df.groupby(['respondentID', 'activity_year'])['nLoans100k'].sum())
inst_year_df = inst_year_df.assign(nLoans250k = loans_by_institution_df.groupby(['respondentID', 'activity_year'])['nLoans250k'].sum())
inst_year_df = inst_year_df.assign(nLoansToSmallest = loans_by_institution_df.groupby(['respondentID', 'activity_year'])['nLoansToSmallest'].sum())
inst_year_df = inst_year_df.assign(nLoansAff = loans_by_institution_df.groupby(['respondentID', 'activity_year'])['nLoansAff'].sum())

inst_year_df = inst_year_df.assign(amtLoans1_adj = loans_by_institution_df.groupby(['respondentID', 'activity_year'])['amtLoans1_adj'].sum())
inst_year_df = inst_year_df.assign(amtLoans100k_adj = loans_by_institution_df.groupby(['respondentID', 'activity_year'])['amtLoans100k_adj'].sum())
inst_year_df = inst_year_df.assign(amtLoans250k_adj = loans_by_institution_df.groupby(['respondentID', 'activity_year'])['amtLoans250k_adj'].sum())
inst_year_df = inst_year_df.assign(amtLoansToSmallest_adj = loans_by_institution_df.groupby(['respondentID', 'activity_year'])['amtLoansToSmallest_adj'].sum())
inst_year_df = inst_year_df.assign(amtLoansAff_adj = loans_by_institution_df.groupby(['respondentID', 'activity_year'])['amtLoansAff_adj'].sum())

inst_year_df.index.name = ['respondentID', 'activity_year']
years_df = inst_year_df.reset_index().groupby('activity_year')[['nLoans', 'amtLoans_adj', 'nLoans1', 'nLoans100k', 'nLoans250k', 'nLoansToSmallest', 'nLoansAff', 'amtLoans1_adj', 'amtLoans100k_adj', 'amtLoans250k_adj', 'amtLoansToSmallest_adj', 'amtLoansAff_adj']].sum()
institutions_df = inst_year_df.reset_index().groupby('respondentID')[['nLoans', 'amtLoans_adj', 'nLoans1', 'nLoans100k', 'nLoans250k', 'nLoansToSmallest', 'nLoansAff', 'amtLoans1_adj', 'amtLoans100k_adj', 'amtLoans250k_adj', 'amtLoansToSmallest_adj', 'amtLoansAff_adj']].sum()
institutions_df = institutions_df.reset_index().merge(
    baltimore_loans_df[['respondentID', 'institution_name']].drop_duplicates(),
    how='left',
    on='respondentID'
).set_index('respondentID')

print('backing up...')
years_df_bk = years_df
institutions_df_bk = institutions_df
print('ok')

In [None]:
fig, ax1 = plt.subplots(1,1, figsize=(12,6))
years_df[['nLoans1', 'nLoans100k', 'nLoans250k']].plot.area(ax=ax1)
plt.xlim([2004, 2016])
plt.xlabel('Year', fontsize=12)
plt.ylabel('Number of loans (thousands)', fontsize=12)

plt.xticks(np.arange(2004,2017))
yticks = np.arange(0,20000,2000)
ylabels = ['{0:,.0f}'.format(y) for y in yticks]
plt.yticks(yticks, ylabels)

plt.legend(["\$1 - \$100k", "\$100k - \$250k", "\$250k - \$1M"], fontsize=12)
plt.title('Number of CRA loans in Baltimore City per year', fontsize=16)

plt.savefig(figdir+'nLoans_per_year.jpg', format='jpg')
print('Done!')

In [None]:
fig, ax1 = plt.subplots(1,1, figsize=(12,6))
years_df[['amtLoans1_adj', 'amtLoans100k_adj', 'amtLoans250k_adj']].plot.area(ax=ax1)
plt.xlim([2004, 2016])
plt.xlabel('Year', fontsize=12)
plt.ylabel('Total amount loaned (million 2017USD)', fontsize=12)

plt.xticks(np.arange(2004,2017))
yticks = np.arange(0,700000000,100000000)
yticklabels = ['${0:,.0f}'.format(y/1000000) for y in yticks]
plt.yticks(yticks, yticklabels)

plt.legend(["From loans of \$1 - \$100k", "From loans of \$100k - \$250k", "From loans of \$250k - \$1M"], fontsize=12)
plt.title('Total money loaned by CRA loans in Baltimore City per year')

plt.savefig(figdir+'amtLoans_per_year.jpg', format='jpg')
print('Done!')

In [None]:
grand_total_nloans = institutions_df['nLoans'].sum()
grand_total_amtloans_adj = institutions_df['amtLoans_adj'].sum()
#institutions_df.groupby('institution_name')
institution_names_df = institutions_df.groupby('institution_name')[institutions_df.columns[:-1]].sum()
institution_names_df = institution_names_df.assign(pct_nloans = institution_names_df['nLoans'].apply(lambda x: x / grand_total_nloans))
institution_names_df = institution_names_df.assign(pct_amtloans_adj = institution_names_df['amtLoans_adj'].apply(lambda x: x / grand_total_amtloans_adj))
institution_names_df = institution_names_df.sort_values(by='nLoans', ascending=False)

htmlString = '<p><strong>These institutions make up ~96% of the total number of loans:</strong></p>'
htmlString += '<table><tr><th>Institution</th><th>Number of loans</th><th>Percent of all loans</th></tr>'

for thisbank, thisrow in institution_names_df[['nLoans', 'pct_nloans']][institution_names_df['pct_nloans'] >= 0.01].iterrows():
    htmlString += '<tr>'
    htmlString += '<td>{0:}</td>'.format(thisbank)
    htmlString += '<td>{0:,.0f}</td>'.format(thisrow['nLoans'])
    htmlString += '<td>{0:.1%}</td>'.format(thisrow['pct_nloans'])
    htmlString += '</tr>'
htmlString += '</table>'

display(HTML(htmlString))

In [None]:
institution_names_df = institution_names_df.sort_values(by='amtLoans_adj', ascending=False)

htmlString = '<p><strong>These institutions make up ~95% of the amount of money loaned:</strong></p>'
htmlString += '<table><tr><th>Institution</th><th>Number of loans</th><th>Percent of all loans</th></tr>'

for thisbank, thisrow in institution_names_df[['amtLoans_adj', 'pct_amtloans_adj']][institution_names_df['pct_amtloans_adj'] >= 0.005].iterrows():
    htmlString += '<tr>'
    htmlString += '<td>{0:}</td>'.format(thisbank)
    htmlString += '<td>{0:,.0f}</td>'.format(thisrow['amtLoans_adj'])
    htmlString += '<td>{0:.1%}</td>'.format(thisrow['pct_amtloans_adj'])
    htmlString += '</tr>'
htmlString += '</table>'

display(HTML(htmlString))