In [None]:
import pandas as pd

# South Carolina

## reverse mortgage

In [None]:
#read csv

sc_all_2021 = pd.read_csv('./all_mortgage_data/sc_all_2021_originated.csv', dtype={
    'county_code': str,
    'census_tract': str
})

# filter to reverse mortgage

rm_sc_all_2021 = sc_all_2021[(sc_all_2021['reverse_mortgage'] == 1)]


# export to csv

rm_sc_all_2021.to_csv('./reverse_mortgage_data/south_carolina/rm_sc_all_2021.csv', index=False)


In [None]:
rm_sc_all_2021.info()

## census

In [None]:
# read census csv

acs2021 = pd.read_csv('./census_data/acs_2021_sc.csv')

# drop the second header row

acs2021 = acs2021.drop([0])

# subset to population data

acs_2021_race = acs2021[['GEO_ID', 'NAME', 'DP05_0001E', 'DP05_0071E', 'DP05_0078E']]

# rename columns

acs_2021_race.columns = ['geo_id', 'tract_name', 'population', 'hispanic_pp', 'black_pp']

# split geo_id column

tract_code = acs_2021_race['geo_id'].str.split(pat='US', expand=True)

# insert the census_tract column

acs_2021_race.insert(loc = 1, column='census_tract', value=tract_code[1])

# correctly type the data

acs_2021_race = acs_2021_race.astype({
    'population': int,
    'hispanic_pp': int,
    'black_pp': int
})

# calcuscte hispanic & bscck population

acs_2021_race['hispanic_pp_pct'] = acs_2021_race['hispanic_pp'] / acs_2021_race['population'] * 100
acs_2021_race['black_pp_pct'] = acs_2021_race['black_pp'] / acs_2021_race['population'] * 100


In [None]:
acs_2021_race

In [None]:
# export census table to csv

acs_2021_race.to_csv('./census_data/sc_census_2021.csv', index=False)

## merge

In [None]:
# group by census tract to see mortgage count

sc_tract_count_2021 = rm_sc_all_2021.groupby('census_tract').agg({
    'lei':'count',
    'tract_population': 'max',
    'tract_minority_population_percent': 'min'
    }).sort_values(by='lei', ascending=False).reset_index()

# rename columns 

sc_tract_count_2021.columns = ['census_tract', 'rm_no', 'tract_population', 'minority_pct']

# calcuscte mortgage count per 1000 people

sc_tract_count_2021['rm_no_per1000'] = sc_tract_count_2021['rm_no'] / sc_tract_count_2021['tract_population'] * 1000 

# sort by count per 1000 people 

sc_tract_count_2021 = sc_tract_count_2021.sort_values(by='rm_no_per1000', ascending=False).reset_index(drop=True)

In [None]:
# merge the census tract info and reverse mortgage info

sc_rm_census = pd.merge(
    sc_tract_count_2021,
    acs_2021_race,
    how='left',
    on='census_tract',
    validate='1:1'
)

In [None]:
sc_rm_census.head(20)

In [None]:
no_geoid = sc_rm_census[sc_rm_census['geo_id'].isna()]
no_geoid

In [None]:
# export to csv 

sc_rm_census.to_csv('./reverse_mortgage_data/sc_tract_count_2021_0710.csv', index=False)