# Home Mortgage Disclosure Act Final Project

In [4]:
import warnings
import pandas as pd
warnings.filterwarnings("ignore", category=pd.errors.SettingWithCopyWarning)


### Step 1. Dataset Selection

In [5]:
file = "state_GA.csv" 
data = pd.read_csv(file)

data_reduced = data[['lei', 'derived_ethnicity', 'derived_race', 'derived_sex', 'loan_amount', 'debt_to_income_ratio', 'loan_purpose', 'loan_term', 'action_taken', 'denial_reason-1', 'denial_reason-2', 
                     'income', 'applicant_age', 'total_loan_costs', 'interest_rate', 'applicant_race-1', 'applicant_race-2', 'applicant_sex','co-applicant_sex']]

FileNotFoundError: [Errno 2] No such file or directory: 'state_GA.csv'

In [100]:
## Only Need to run once
data_reduced.to_csv("state_GA_reduced.csv", index=False)
hmda_data = pd.read_csv("state_GA_reduced.csv")

In [None]:
sex_map = {
    1: "Male",
    2: "Female",
    6: "Applicant selected both male and female"
}

race_map = {
    1: "American Indian or Alaska Native",
    2: "Asian",
    21: "Asian Indian",
    22: "Chinese",
    23: "Filipino",
    24: "Japanese",
    25: "Korean",
    26: "Vietnamese",
    27: "Other Asian",
    3: "Black or African American",
    4: "Native Hawaiian or Other Pacific Islander",
    41: "Native Hawaiian",
    42: "Guamanian or Chamorro",
    43: "Samoan",
    44: "Other Pacific Islander",
    5: "White"
}

data_reduced['applicant_race-1'] = pd.to_numeric(data_reduced['applicant_race-1'], errors='coerce')
data_reduced['applicant_race-2'] = pd.to_numeric(data_reduced['applicant_race-2'], errors='coerce')
data_reduced['applicant_sex'] = pd.to_numeric(data_reduced['applicant_sex'], errors='coerce')
data_reduced['co-applicant_sex'] = pd.to_numeric(data_reduced['co-applicant_sex'], errors='coerce')

filtered = data_reduced[
    data_reduced['applicant_race-1'].isin(race_map.keys()) &
    (
        data_reduced['applicant_race-2'].isna() |
        data_reduced['applicant_race-2'].isin(race_map.keys())
    ) &
    data_reduced['applicant_sex'].isin(sex_map.keys()) &
    (
        data_reduced['co-applicant_sex'].isna() |
        data_reduced['co-applicant_sex'].isin(sex_map.keys())
    )

]

filtered['race_1_str'] = filtered['applicant_race-1'].map(race_map)
filtered['race_2_str'] = filtered['applicant_race-2'].map(race_map)
def combine_races_str(row):
    if pd.isna(row['race_2_str']) or row['race_2_str'] == "":
        return row['race_1_str']
    return f"{row['race_1_str']}, {row['race_2_str']}"
def combine_sexs_str(row):
    if pd.isna(row['co-applicant_sex']) or row['co-applicant_sex'] == "":
        return row['applicant_sex']
    return f"{row['applicant_sex']}, {row['co-applicant_sex']}"

filtered['derived_race_new'] = filtered.apply(combine_races_str, axis=1)

filtered['applicant_sex'] = filtered['applicant_sex'].map(sex_map)
filtered['co-applicant_sex'] = filtered['co-applicant_sex'].map(sex_map)

filtered['derived_sex_new'] = filtered.apply(combine_sexs_str, axis=1)

filtered = filtered[filtered['action_taken'] != 6]
filtered = filtered[filtered['interest_rate'] != 'Exempt']


filtered['favorable_action_taken'] = filtered['action_taken'].apply(
    lambda x: 1 if x in [1, 2, 8] else (0 if x in [3, 4, 5, 7] else pd.NA)
)
filtered['interest_rate'].unique()
filtered['favorable_interest_rate'] = filtered['interest_rate'].apply(
    lambda x: 1 if float(x) <= 7.5 else 0)

unique_races = sorted(filtered['derived_race_new'].unique())
unique_sexes = sorted(filtered['derived_sex_new'].unique())

# New encoding dictionaries
final_race_encoding = {race: i for i, race in enumerate(unique_races)}
final_sex_encoding = {sex: i for i, sex in enumerate(unique_sexes)}

filtered['derived_race_encoded'] = filtered['derived_race_new'].map(final_race_encoding)
filtered['derived_sex_encoded'] = filtered['derived_sex_new'].map(final_sex_encoding)


filtered.to_csv("state_GA_reduced_encoded.csv", index=False)

array([1, 0])

In [107]:
import matplotlib.pyplot as plt
import seaborn as sns
import os
protected_classes = {
    "derived_race_new": "Race",
    "derived_sex_new": "Sex"
}
dependent_variables = {
    "favorable_action_taken": "Favorable Action Taken",
    "favorable_interest_rate": "Favorable Interest Rate"
}

os.makedirs("charts", exist_ok=True)

tables = []
for protected_var, protected_label in protected_classes.items():
    for dependent_var, dependent_label in dependent_variables.items():
        freq_table = pd.crosstab(filtered[protected_var], filtered[dependent_var], margins=True, dropna=False)
        freq_table.columns.name = dependent_label
        freq_table.index.name = protected_label
        tables.append((protected_var, dependent_var, freq_table))

        # Create and save bar chart
        plt.figure(figsize=(10, 6))
        sns.countplot(data=filtered, x=protected_var, hue=dependent_var)
        plt.title(f"{protected_label} vs {dependent_label}")
        plt.xlabel(protected_label)
        plt.ylabel("Count")
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        plt.savefig(f"charts/{protected_var}_vs_{dependent_var}.png")
        plt.close()

# Display all frequency tables
for protected_var, dependent_var, table in tables:
    print(f"\n===== Frequency Table: {protected_var} vs {dependent_var} =====\n")
    display(table)



===== Frequency Table: derived_race_new vs favorable_action_taken =====



Favorable Action Taken,0,1,All
Race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
American Indian or Alaska Native,162,182,344
"American Indian or Alaska Native, Asian",7,11,18
"American Indian or Alaska Native, Asian Indian",0,2,2
"American Indian or Alaska Native, Black or African American",51,50,101
"American Indian or Alaska Native, Japanese",1,0,1
...,...,...,...
"White, Other Asian",18,13,31
"White, Other Pacific Islander",6,5,11
"White, Samoan",3,2,5
"White, Vietnamese",1,6,7



===== Frequency Table: derived_race_new vs favorable_interest_rate =====



Favorable Interest Rate,0,1,All
Race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
American Indian or Alaska Native,205,139,344
"American Indian or Alaska Native, Asian",8,10,18
"American Indian or Alaska Native, Asian Indian",0,2,2
"American Indian or Alaska Native, Black or African American",60,41,101
"American Indian or Alaska Native, Japanese",1,0,1
...,...,...,...
"White, Other Asian",21,10,31
"White, Other Pacific Islander",7,4,11
"White, Samoan",4,1,5
"White, Vietnamese",1,6,7



===== Frequency Table: derived_sex_new vs favorable_action_taken =====



Favorable Action Taken,0,1,All
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Applicant selected both male and female, Applicant selected both male and female",5,8,13
"Applicant selected both male and female, Female",63,89,152
"Applicant selected both male and female, Male",43,62,105
"Female, Applicant selected both male and female",13,16,29
"Female, Female",2125,3028,5153
"Female, Male",12134,19722,31856
"Male, Applicant selected both male and female",19,23,42
"Male, Female",22844,41938,64782
"Male, Male",1513,2550,4063
All,38759,67436,106195



===== Frequency Table: derived_sex_new vs favorable_interest_rate =====



Favorable Interest Rate,0,1,All
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Applicant selected both male and female, Applicant selected both male and female",7,6,13
"Applicant selected both male and female, Female",79,73,152
"Applicant selected both male and female, Male",51,54,105
"Female, Applicant selected both male and female",15,14,29
"Female, Female",2687,2466,5153
"Female, Male",16990,14866,31856
"Male, Applicant selected both male and female",23,19,42
"Male, Female",33599,31183,64782
"Male, Male",2002,2061,4063
All,55453,50742,106195
