In [297]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)

# Los Angeles County

## census

In [None]:
# read census csv

acs2019 = pd.read_csv('./census_data/acs_2019_la.csv')

In [None]:
acs2019

In [None]:
# drop the second header row

acs2019 = acs2019.drop([0])

In [None]:
# subset to population data

acs_2019_race = acs2019[['GEO_ID', 'NAME', 'DP05_0001E', 'DP05_0071E', 'DP05_0078E']]

In [None]:
acs_2019_race.info()

In [None]:
# rename columns

acs_2019_race.columns = ['geo_id', 'tract_name', 'population', 'hispanic_pp', 'black_pp']

In [None]:
acs_2019_race.info()

In [None]:
tract_code = acs_2019_race['geo_id'].str.split(pat='US', expand=True)

In [None]:
tract_code

In [None]:
acs_2019_race

In [None]:
# insert the census_tract column

acs_2019_race.insert(loc = 1, column='census_tract', value=tract_code[1])

In [None]:
acs_2019_race = acs_2019_race.astype({
    'population': int,
    'hispanic_pp': int,
    'black_pp': int
})

In [None]:
acs_2019_race.info()

In [None]:
acs_2019_race['hispanic_pp_pct'] = acs_2019_race['hispanic_pp'] / acs_2019_race['population'] * 100
acs_2019_race['black_pp_pct'] = acs_2019_race['black_pp'] / acs_2019_race['population'] * 100

In [None]:
acs_2019_race

In [None]:
# export

acs_2019_race.to_csv('./census_data/LA_census_2019.csv', index=False)

In [None]:
#read csv

la_all_2018 = pd.read_csv('./all_mortgage_data/la_all_2018_originated.csv', dtype={
    'county_code': str,
    'census_tract': str
})

la_all_2019 = pd.read_csv('./all_mortgage_data/la_all_2019_originated.csv', dtype={
    'county_code': str,
    'census_tract': str
})

la_all_2020 = pd.read_csv('./all_mortgage_data/la_all_2020_originated.csv', dtype={
    'county_code': str,
    'census_tract': str
})

la_all_2021 = pd.read_csv('./all_mortgage_data/la_all_2021_originated.csv', dtype={
    'county_code': str,
    'census_tract': str
})

la_all_2022 = pd.read_csv('./all_mortgage_data/la_all_2022_originated.csv', dtype={
    'county_code': str,
    'census_tract': str
})

In [None]:
# filter to reverse mortgage

rm_la_all_2018 = la_all_2018[(la_all_2018['reverse_mortgage'] == 1)]
rm_la_all_2019 = la_all_2019[(la_all_2019['reverse_mortgage'] == 1)]
rm_la_all_2020 = la_all_2020[(la_all_2020['reverse_mortgage'] == 1)]
rm_la_all_2021 = la_all_2021[(la_all_2021['reverse_mortgage'] == 1)]
rm_la_all_2022 = la_all_2022[(la_all_2022['reverse_mortgage'] == 1)]

In [None]:
rm_la_all_2018.info()

In [None]:
rm_la_all_2019.info()

In [None]:
rm_la_all_2020.info()

In [None]:
rm_la_all_2021.info()

In [None]:
rm_la_all_2022.info()

In [None]:
# concatenate 2018-2021 datasets

rm_la_all = pd.concat((rm_la_all_2018, rm_la_all_2019, rm_la_all_2020, rm_la_all_2021, rm_la_all_2022), axis=0)

In [None]:
rm_la_all.info()

In [None]:
# export to csv

rm_la_all.to_csv('./reverse_mortgage_data/rm_la_all.csv', index=False)

# analyze the LA data

How many reverse mortgages are taken out each year from 2018 to 2022? 

In [None]:
rm_la_all.groupby(by='activity_year').count().sort_values(by='activity_year',ascending=False)

what's the racial distribution of the reverse mortgage borrowers? 

In [None]:
rm_la_all.groupby(by='derived_race').count().sort_values(by='lei',ascending=False)

In [None]:
# group by census tract to see mortgage count

la_tract_count = rm_la_all.groupby('census_tract').agg({
    'lei':'count',
    'tract_population': 'max',
    'tract_minority_population_percent': 'min'
    }).sort_values(by='lei', ascending=False).reset_index()

In [None]:
la_tract_count

In [None]:
# rename columns 

la_tract_count.columns = ['census_tract', 'rm_no', 'tract_population', 'minority_pct']

In [None]:
la_tract_count

In [None]:
# calculate mortgage count per 1000 people

la_tract_count['rm_no_per1000'] = la_tract_count['rm_no'] / la_tract_count['tract_population'] * 1000 

In [None]:
# sort by count per 1000 people 

la_tract_count = la_tract_count.sort_values(by='rm_no_per1000', ascending=False).reset_index(drop=True)

In [None]:
la_tract_count.info()

In [None]:
acs_2019_race.info()

In [None]:
# merge the census tract info and reverse mortgage info

la_rm_census = pd.merge(
    la_tract_count,
    acs_2019_race,
    how='left',
    on='census_tract',
    validate='1:1'
)

In [None]:
la_rm_census.info()

In [None]:
la_rm_census = la_rm_census.sort_values(by='rm_no_per1000', ascending=False)

In [None]:
la_rm_census.head(15)

In [None]:
no_geoid = la_rm_census[la_rm_census['geo_id'].isna()]
no_geoid

In [None]:
la_rm_census.to_csv('./reverse_mortgage_data/la_tract_count_0710.csv', index=False)

## Alameda County

In [None]:
# read csv

alameda_all_2018 = pd.read_csv('./all_mortgage_data/sac_all_2018.csv', dtype={
    'county_code': str,
    'census_tract': str
})

sac_all_2019 = pd.read_csv('./all_mortgage_data/alameda_all_2019.csv', dtype={
    'county_code': str,
    'census_tract': str
})

alameda_all_2020 = pd.read_csv('./all_mortgage_data/alameda_all_2020.csv', dtype={
    'county_code': str,
    'census_tract': str
})

alameda_all_2021 = pd.read_csv('./all_mortgage_data/alameda_all_2021.csv', dtype={
    'county_code': str,
    'census_tract': str
})

In [None]:
# filter to reverse mortgage

alameda_rm_2018 = alameda_all_2018[(alameda_all_2018['reverse_mortgage'] == 1)]
alameda_rm_2019 = alameda_all_2019[(alameda_all_2019['reverse_mortgage'] == 1)]
alameda_rm_2020 = alameda_all_2020[(alameda_all_2020['reverse_mortgage'] == 1)]
alameda_rm_2021 = alameda_all_2021[(alameda_all_2021['reverse_mortgage'] == 1)]

In [None]:
# concatenate 2018-2021 years

alameda_rm_all = pd.concat((alameda_rm_2018, alameda_rm_2019, alameda_rm_2020, alameda_rm_2021), axis=0)
alameda_rm_all.info()

In [None]:
# export
alameda_rm_all.to_csv('./reverse_mortgage_data/alameda_rm_all.csv', index=False)

## Sacramento County

In [None]:
# read csv

sac_all_2018 = pd.read_csv('./all_mortgage_data/sac_all_2018.csv', dtype={
    'county_code': str,
    'census_tract': str
})

sac_all_2019 = pd.read_csv('./all_mortgage_data/sac_all_2019.csv', dtype={
    'county_code': str,
    'census_tract': str
})

sac_all_2020 = pd.read_csv('./all_mortgage_data/sac_all_2020.csv', dtype={
    'county_code': str,
    'census_tract': str
})

sac_all_2021 = pd.read_csv('./all_mortgage_data/sac_all_2021.csv', dtype={
    'county_code': str,
    'census_tract': str
})

In [None]:
# filter to reverse mortgage

sac_rm_2018 = sac_all_2018[(sac_all_2018['reverse_mortgage'] == 1)]
sac_rm_2019 = sac_all_2019[(sac_all_2019['reverse_mortgage'] == 1)]
sac_rm_2020 = sac_all_2020[(sac_all_2020['reverse_mortgage'] == 1)]
sac_rm_2021 = sac_all_2021[(sac_all_2021['reverse_mortgage'] == 1)]

In [None]:
# concatenate 2018-2021 years

sac_rm_all = pd.concat((sac_rm_2018, sac_rm_2019, sac_rm_2020, sac_rm_2021), axis=0)
sac_rm_all.info()

In [None]:
# export
sac_rm_all.to_csv('./reverse_mortgage_data/sac_rm_all.csv', index=False)

## San Bernardino County

In [None]:
# read csv

san_bernardino_all_2018 = pd.read_csv('./all_mortgage_data/san_bernardino_all_2018.csv', dtype={
    'county_code': str,
    'census_tract': str
})

san_bernardino_all_2019 = pd.read_csv('./all_mortgage_data/san_bernardino_all_2019.csv', dtype={
    'county_code': str,
    'census_tract': str
})

san_bernardino_all_2020 = pd.read_csv('./all_mortgage_data/san_bernardino_all_2020.csv', dtype={
    'county_code': str,
    'census_tract': str
})

san_bernardino_all_2021 = pd.read_csv('./all_mortgage_data/san_bernardino_all_2021.csv', dtype={
    'county_code': str,
    'census_tract': str
})

In [None]:
# filter to reverse mortgage

san_bernardino_rm_2018 = san_bernardino_all_2018[(san_bernardino_all_2018['reverse_mortgage'] == 1)]
san_bernardino_rm_2019 = san_bernardino_all_2019[(san_bernardino_all_2019['reverse_mortgage'] == 1)]
san_bernardino_rm_2020 = san_bernardino_all_2020[(san_bernardino_all_2020['reverse_mortgage'] == 1)]
san_bernardino_rm_2021 = san_bernardino_all_2021[(san_bernardino_all_2021['reverse_mortgage'] == 1)]

In [None]:
# concatenate 2018-2021 years

san_bernardino_rm_all = pd.concat((san_bernardino_rm_2018, san_bernardino_rm_2019, san_bernardino_rm_2020, san_bernardino_rm_2021), axis=0)
san_bernardino_rm_all.info()

In [None]:
# export
san_bernardino_rm_all.to_csv('./reverse_mortgage_data/san_bernardino_rm_all.csv', index=False)

## Riverside County

In [None]:
# read csv

riverside_all_2018 = pd.read_csv('./all_mortgage_data/riverside_all_2018.csv', dtype={
    'county_code': str,
    'census_tract': str
})

riverside_all_2019 = pd.read_csv('./all_mortgage_data/riverside_all_2019.csv', dtype={
    'county_code': str,
    'census_tract': str
})

riverside_all_2020 = pd.read_csv('./all_mortgage_data/riverside_all_2020.csv', dtype={
    'county_code': str,
    'census_tract': str
})

riverside_all_2021 = pd.read_csv('./all_mortgage_data/riverside_all_2021.csv', dtype={
    'county_code': str,
    'census_tract': str
})

In [None]:
# filter to reverse mortgage

riverside_rm_2018 = riverside_all_2018[(riverside_all_2018['reverse_mortgage'] == 1)]
riverside_rm_2019 = riverside_all_2019[(riverside_all_2019['reverse_mortgage'] == 1)]
riverside_rm_2020 = riverside_all_2020[(riverside_all_2020['reverse_mortgage'] == 1)]
riverside_rm_2021 = riverside_all_2021[(riverside_all_2021['reverse_mortgage'] == 1)]

In [None]:
# concatenate 2018-2021 years

riverside_rm_all = pd.concat((riverside_rm_2018, riverside_rm_2019, riverside_rm_2020, riverside_rm_2021), axis=0)
riverside_rm_all.info()

In [None]:
# export
riverside_rm_all.to_csv('./reverse_mortgage_data/riverside_rm_all.csv', index=False)

## Imperial County

In [None]:
# read csv

imperial_all_2018 = pd.read_csv('./all_mortgage_data/imperial_all_2018.csv', dtype={
    'county_code': str,
    'census_tract': str
})

imperial_all_2019 = pd.read_csv('./all_mortgage_data/imperial_all_2019.csv', dtype={
    'county_code': str,
    'census_tract': str
})

imperial_all_2020 = pd.read_csv('./all_mortgage_data/imperial_all_2020.csv', dtype={
    'county_code': str,
    'census_tract': str
})

imperial_all_2021 = pd.read_csv('./all_mortgage_data/imperial_all_2021.csv', dtype={
    'county_code': str,
    'census_tract': str
})

In [None]:
# filter to reverse mortgage

imperial_rm_2018 = imperial_all_2018[(imperial_all_2018['reverse_mortgage'] == 1)]
imperial_rm_2019 = imperial_all_2019[(imperial_all_2019['reverse_mortgage'] == 1)]
imperial_rm_2020 = imperial_all_2020[(imperial_all_2020['reverse_mortgage'] == 1)]
imperial_rm_2021 = imperial_all_2021[(imperial_all_2021['reverse_mortgage'] == 1)]


In [None]:
# concatenate 2018-2021 years

imperial_rm_all = pd.concat((imperial_rm_2018, imperial_rm_2019, imperial_rm_2020, imperial_rm_2021), axis=0)
imperial_rm_all.info()

In [None]:
# export
imperial_rm_all.to_csv('./reverse_mortgage_data/imperial_rm_all.csv', index=False)


## San Diego County

In [None]:
# read csv

san_diego_all_2018 = pd.read_csv('./all_mortgage_data/san_diego_all_2018.csv', dtype={
    'county_code': str,
    'census_tract': str
})

san_diego_all_2019 = pd.read_csv('./all_mortgage_data/richland_all_2019.csv', dtype={
    'county_code': str,
    'census_tract': str
})

san_diego_all_2020 = pd.read_csv('./all_mortgage_data/san_diego_all_2020.csv', dtype={
    'county_code': str,
    'census_tract': str
})

san_diego_all_2021 = pd.read_csv('./all_mortgage_data/san_diego_all_2021.csv', dtype={
    'county_code': str,
    'census_tract': str
})

In [None]:
# filter to reverse mortgage

san_diego_rm_2018 = san_diego_all_2018[(san_diego_all_2018['reverse_mortgage'] == 1)]
san_diego_rm_2019 = san_diego_all_2019[(san_diego_all_2019['reverse_mortgage'] == 1)]
san_diego_rm_2020 = san_diego_all_2020[(san_diego_all_2020['reverse_mortgage'] == 1)]
san_diego_rm_2021 = san_diego_all_2021[(san_diego_all_2021['reverse_mortgage'] == 1)]

In [None]:
# concatenate 2018-2021 years

san_diego_rm_all = pd.concat((san_diego_rm_2018, san_diego_rm_2019, san_diego_rm_2020, san_diego_rm_2021), axis=0)
san_diego_rm_all.info()

In [None]:
# export
san_diego_rm_all.to_csv('./reverse_mortgage_data/san_diego_rm_all.csv', index=False)

## Richland, SC

In [None]:
# read csv

richland_all_2018 = pd.read_csv('./all_mortgage_data/richland_all_2018.csv', dtype={
    'county_code': str,
    'census_tract': str
})

richland_all_2019 = pd.read_csv('./all_mortgage_data/richland_all_2019.csv', dtype={
    'county_code': str,
    'census_tract': str
})

richland_all_2020 = pd.read_csv('./all_mortgage_data/richland_all_2020.csv', dtype={
    'county_code': str,
    'census_tract': str
})

richland_all_2021 = pd.read_csv('./all_mortgage_data/richland_all_2021.csv', dtype={
    'county_code': str,
    'census_tract': str
})

In [None]:
# filter to reverse mortgage

richland_rm_2018 = richland_all_2018[(richland_all_2018['reverse_mortgage'] == 1)]
richland_rm_2019 = richland_all_2019[(richland_all_2019['reverse_mortgage'] == 1)]
richland_rm_2020 = richland_all_2020[(richland_all_2020['reverse_mortgage'] == 1)]
richland_rm_2021 = richland_all_2021[(richland_all_2021['reverse_mortgage'] == 1)]

In [None]:
# concatenate 2018-2021 years

richland_rm_all = pd.concat((richland_rm_2018, richland_rm_2019, richland_rm_2020, richland_rm_2021), axis=0)
richland_rm_all.info()

In [None]:
# export
richland_rm_all.to_csv('./reverse_mortgage_data/richland_rm_all.csv', index=False)

## Greenville, SC

In [None]:
# read csv

greenville_all_2018 = pd.read_csv('./all_mortgage_data/greenville_all_2018.csv', dtype={
    'county_code': str,
    'census_tract': str
})

greenville_all_2019 = pd.read_csv('./all_mortgage_data/greenville_all_2019.csv', dtype={
    'county_code': str,
    'census_tract': str
})

greenville_all_2020 = pd.read_csv('./all_mortgage_data/greenville_all_2020.csv', dtype={
    'county_code': str,
    'census_tract': str
})

greenville_all_2021 = pd.read_csv('./all_mortgage_data/greenville_all_2021.csv', dtype={
    'county_code': str,
    'census_tract': str
})

# filter to reverse mortgage

greenville_rm_2018 = greenville_all_2018[(greenville_all_2018['reverse_mortgage'] == 1)]
greenville_rm_2019 = greenville_all_2019[(greenville_all_2019['reverse_mortgage'] == 1)]
greenville_rm_2020 = greenville_all_2020[(greenville_all_2020['reverse_mortgage'] == 1)]
greenville_rm_2021 = greenville_all_2021[(greenville_all_2021['reverse_mortgage'] == 1)]

# concatenate 2018-2021 years

greenville_rm_all = pd.concat((greenville_rm_2018, greenville_rm_2019, greenville_rm_2020, greenville_rm_2021), axis=0)
greenville_rm_all.info()

In [None]:
# export
greenville_rm_all.to_csv('./reverse_mortgage_data/greenville_rm_all.csv', index=False)

## Florida

In [None]:
# read csv

florida_all_2018 = pd.read_csv('./all_mortgage_data/florida_all_2018.csv', dtype={
    'county_code': str,
    'census_tract': str
})

florida_all_2019 = pd.read_csv('./all_mortgage_data/florida_all_2019.csv', dtype={
    'county_code': str,
    'census_tract': str
})

florida_all_2020 = pd.read_csv('./all_mortgage_data/florida_all_2020.csv', dtype={
    'county_code': str,
    'census_tract': str
})

florida_all_2021 = pd.read_csv('./all_mortgage_data/florida_all_2021.csv', dtype={
    'county_code': str,
    'census_tract': str
})

In [None]:
# filter to reverse mortgage

florida_rm_2018 = florida_all_2018[(florida_all_2018['reverse_mortgage'] == 1)]
florida_rm_2019 = florida_all_2019[(florida_all_2019['reverse_mortgage'] == 1)]
florida_rm_2020 = florida_all_2020[(florida_all_2020['reverse_mortgage'] == 1)]
florida_rm_2021 = florida_all_2021[(florida_all_2021['reverse_mortgage'] == 1)]


In [None]:
# concatenate 2018-2021 years

florida_rm_all = pd.concat((florida_rm_2018, florida_rm_2019, florida_rm_2020, florida_rm_2021), axis=0)
florida_rm_all.info()

In [None]:
florida_rm_all.sample(10)

In [None]:
# export
florida_rm_all.to_csv('./reverse_mortgage_data/florida_rm_all.csv', index=False)

## Colorado

In [None]:
# read csv

colorado_all_2018 = pd.read_csv('./all_mortgage_data/colorado_all_2018.csv', dtype={
    'county_code': str,
    'census_tract': str
})

colorado_all_2019 = pd.read_csv('./all_mortgage_data/colorado_all_2019.csv', dtype={
    'county_code': str,
    'census_tract': str
})

colorado_all_2020 = pd.read_csv('./all_mortgage_data/colorado_all_2020.csv', dtype={
    'county_code': str,
    'census_tract': str
})

colorado_all_2021 = pd.read_csv('./all_mortgage_data/colorado_all_2021.csv', dtype={
    'county_code': str,
    'census_tract': str
})

# filter to reverse mortgage

colorado_rm_2018 = colorado_all_2018[(colorado_all_2018['reverse_mortgage'] == 1)]
colorado_rm_2019 = colorado_all_2019[(colorado_all_2019['reverse_mortgage'] == 1)]
colorado_rm_2020 = colorado_all_2020[(colorado_all_2020['reverse_mortgage'] == 1)]
colorado_rm_2021 = colorado_all_2021[(colorado_all_2021['reverse_mortgage'] == 1)]

# concatenate 2018-2021 years

colorado_rm_all = pd.concat((colorado_rm_2018, colorado_rm_2019, colorado_rm_2020, colorado_rm_2021), axis=0)
colorado_rm_all.info()


In [None]:
colorado_rm_all.to_csv('./reverse_mortgage_data/colorado_rm_all.csv', index=False)

## All places 2020

In [298]:
all_2020 = pd.read_csv('./all_mortgage_data/2020_public_lar_one_year.csv', dtype={
    'county_code': str,
    'census_tract': str
})

  all_2020 = pd.read_csv('./all_mortgage_data/2020_public_lar_one_year.csv', dtype={


In [299]:
all_rm_2020 = all_2020[(all_2020['reverse_mortgage'] == 1) & all_2020['action_taken'] == 1]

In [300]:
# groupby state code

rm_2020_state = all_rm_2020.groupby(['state_code']).count()
rm_2020_state = rm_2020_state.sort_values(by='lei', ascending=False)
rm_2020_state_count = rm_2020_state[['lei']].reset_index()

In [301]:
rm_2020_state_count

Unnamed: 0,state_code,lei
0,CA,14877
1,FL,4547
2,TX,3669
3,CO,3395
4,AZ,3107
5,WA,2643
6,UT,1830
7,NY,1689
8,OR,1452
9,NV,1167


In [302]:
population_2020 = pd.read_csv('./census_data/population_2020.csv')

In [303]:
population_2020

Unnamed: 0,State,state_code,total_population
0,Alabama,AL,4893186.0
1,Alaska,AK,736990.0
2,Arizona,AZ,7174064.0
3,Arkansas,AR,3011873.0
4,California,CA,39346023.0
5,Colorado,CO,5684926.0
6,Connecticut,CT,3570549.0
7,Delaware,DE,967679.0
8,District of Columbia,DC,701974.0
9,Florida,FL,21216924.0


In [304]:
rm_count_2020 = pd.merge(
    rm_2020_state_count,
    population_2020,
    on='state_code',
    how='left',
    validate='1:1'
)

In [305]:
rm_count_2020

Unnamed: 0,state_code,lei,State,total_population
0,CA,14877,California,39346023.0
1,FL,4547,Florida,21216924.0
2,TX,3669,Texas,28635442.0
3,CO,3395,Colorado,5684926.0
4,AZ,3107,Arizona,7174064.0
5,WA,2643,Washington,7512465.0
6,UT,1830,Utah,3151239.0
7,NY,1689,New York,19514849.0
8,OR,1452,Oregon,4176346.0
9,NV,1167,Nevada,3030281.0


In [306]:
rm_count_2020['rm_per_1000'] = rm_count_2020['lei'] / rm_count_2020['total_population'] * 1000

In [307]:
rm_count_2020 = rm_count_2020.sort_values(by='rm_per_1000', ascending=False)

In [308]:
rm_count_2020 = rm_count_2020.reset_index(drop=True)

In [309]:
rm_count_2020

Unnamed: 0,state_code,lei,State,total_population,rm_per_1000
0,CO,3395,Colorado,5684926.0,0.597193
1,UT,1830,Utah,3151239.0,0.580724
2,ID,840,Idaho,1754367.0,0.478805
3,AZ,3107,Arizona,7174064.0,0.433088
4,NV,1167,Nevada,3030281.0,0.385113
5,CA,14877,California,39346023.0,0.378107
6,WA,2643,Washington,7512465.0,0.351815
7,OR,1452,Oregon,4176346.0,0.347672
8,DC,213,District of Columbia,701974.0,0.30343
9,FL,4547,Florida,21216924.0,0.21431


In [None]:
rm_count_2020.to_csv('./rm_count_2020.csv', index=False)

In [None]:
# groupby census_tract

rm_2020_tract = all_rm_2020.groupby(['census_tract']).count()
rm_2020_tract = rm_2020_tract.sort_values(by='lei', ascending=False)
rm_2020_tract_count = rm_2020_tract[['lei']].reset_index()

In [None]:
rm_2020_tract_count