# Import trained models

In [1]:
# import trained models
import xgboost as xgb
housing_model = xgb.XGBRegressor()
model_file_path = '/scratch/network/lg6248/housing-model/xgb_housing.model_upto2021' 
housing_model.load_model(model_file_path)

loan_model = xgb.XGBRegressor()
model_file_path = '/scratch/network/lg6248/loan-model/xgb_loan.model_upto2017_withoutStateCountyCodes-SUBMIT'
loan_model.load_model(model_file_path)

In [2]:
# import dataset for housing
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import math
housing_df = pd.read_csv('/scratch/network/lg6248/housing-model/csv_housing_XGBoost_PriceLimit_2000_2022_not_normalized')
housing_df

Unnamed: 0,Price,State,County,Time,Bedrooms,Year,Month
0,158704.158466,NY,Queens County,2000-01-31,1,2000,1
1,142042.736354,TX,Tarrant County,2000-01-31,3,2000,1
2,151032.122248,IA,Linn County,2000-01-31,3,2000,1
3,137097.551500,OH,Summit County,2000-01-31,3,2000,1
4,133334.515021,VT,Washington County,2000-01-31,3,2000,1
...,...,...,...,...,...,...,...
7776271,324835.405696,FL,Marion County,2022-12-31,3,2022,12
7776272,287029.753423,PA,Adams County,2022-12-31,3,2022,12
7776273,153940.746210,LA,Rapides Parish,2022-12-31,3,2022,12
7776274,237366.825163,SC,Orangeburg County,2022-12-31,3,2022,12


In [3]:
# import loan dataset
# load
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import math

# Program to make predictions and determine optimal time

In [4]:
 # Initialize Binary Encoder (same used during training of XGBoost housing model)
import joblib

## reverse_normalize

In [5]:
from sklearn.preprocessing import StandardScaler

def reverse_normalize(prices):
    
    # Initialize the StandardScaler
    scaler = joblib.load('/scratch/network/lg6248/housing-model/scaler.pkl')
    
    # Fit and transform the 'Price' column
    un_normalized_prices = scaler.inverse_transform(prices.reshape(-1, 1))
    
    # return the normalized 'Price' column
    return un_normalized_prices

## process_inputs

In [6]:
from category_encoders import BinaryEncoder

column_order = \
['State_0',
 'State_1',
 'State_2',
 'State_3',
 'State_4',
 'State_5',
 'County_0',
 'County_1',
 'County_2',
 'County_3',
 'County_4',
 'County_5',
 'County_6',
 'County_7',
 'County_8',
 'County_9',
 'County_10',
 'Bedrooms',
 'Year',
 'Month']

def process_inputs(housing_features, loan_features):
    # Check if passed in county is in housing dataset used to train model
    county_name = housing_features.loc[0, 'County']
    assert county_name in housing_df['County'].values, "Make sure to end county name with \'County\' and capitalize (see note 2). Given county is not included in training data of housing model."
    
    # -------- binary encoding for housing model: county, state --------
    binary_encoder = joblib.load('/scratch/network/lg6248/housing-model/binary_encoder.pkl')
    
    # Encode 'County' and 'State' variables
    categorical_cols = ['County', 'State']
    housing_features['Price'] = np.zeros(len(housing_features))
    housing_features['Time'] = np.zeros(len(housing_features))
    encoded_data = binary_encoder.transform(housing_features)
    encoded_data.drop(columns=['Price', 'Time'], inplace = True)
    encoded_data = encoded_data.reindex(columns=column_order)
    
    return encoded_data

In [7]:
def parse_years(input_year):
    import re
    # Define the regex pattern for the input format "YYYY"
    pattern = r'^\d{4}$'

    # Check if the input string matches the pattern
    assert re.match(pattern, input_year), "Incorrect year input format! Correct format is: \"YYYY\" "

    return input_year

## make_estimates

In [8]:
import numpy as np
import pandas as pd

def make_estimate(button):
    with output:
        clear_output()
        
        # ---------------------------------- Get inputs ----------------------------------
        year = 2017
        
        # Get values for HOUSING
        month = 4
        bedrooms = housing_bedrooms_dropdown.value  
        housing_state = housing_state_dropdown.value   
        housing_county_name = housing_county_text.value  

        print("\n")
        
        print("Bedrooms:", bedrooms)
        print("State:", housing_state)
        print("County:", housing_county_name)
        print("\n")
        
        # Get values for LOAN 
        income = loan_income__int_text.value   
        loan_amount = loan_amount__int_text.value   

        # print loan info
        print("Income:", income)
        print("Loan Amount:", loan_amount)

        # ------------------------------- Create HOUSING DATAFRAME -------------------------------
        
        housing_data = []
        housing_data.append({'Year': year, 'State': housing_state, 'County': housing_county_name, 'Bedrooms': bedrooms, 'Month': month})
        housing_features = pd.DataFrame(housing_data)

        # ------------------------------- Create LOAN DATAFRAME -------------------------------
        loan_data = []
        loan_data.append({'year': year, 'applicant_income_000s': income, 'loan_amount_000s': loan_amount})
        loan_features = pd.DataFrame(loan_data)

    
        # ------------------------------- PROCESS DATAFRAMES -------------------------------
        housing_features = process_inputs(housing_features, loan_features)
        

        # ------------------------------- MAKE PREDICTIONS -----------------------------------

        normalized_predicted_price = housing_model.predict(housing_features.values)
        predicted_denial_rate = loan_model.predict(loan_features.values)
        predicted_price = reverse_normalize(normalized_predicted_price)
        likelihood = 100 - round(predicted_denial_rate[0], 2) 
        
        print(f"\nEstimated housing price = {round(predicted_price.flatten()[0], 2)}")
        print(f"\nEstimated denial rate = {round(predicted_denial_rate[0], 2)}%")
        print(f"Likelihood of FHA loan approval = {likelihood}%")

# Interface

In [9]:
from IPython.display import display, Javascript, Markdown, clear_output
import ipywidgets as widgets
import joblib

In [10]:
### # ----------------------------------------- Note -----------------------------------------------------------------------

note = Markdown("**Note 1:** Annual gross income and loan amount are in thousands of dollars. Example: if you mean to type $36,000 --> input 36.")
display(note)

note = Markdown("**Note 2:** Make sure to capitalize the county name and end it with 'County'. For example, 'El Dorado County'.")
display(note)

# ----------- Create widgets for HOUSING FEATURES --------------------------------------------------------------------------
# housing features: Year, State, County, Bedrooms, Month
header = Markdown("**Housing Features**")
display(header)
# housing_month_int_text = widgets.BoundedIntText(description="Month:", min=1, max=12)
housing_bedrooms_dropdown = widgets.Dropdown(description="# Bedrooms:", options=[1, 2, 3, 4])
housing_state_dropdown = widgets.Text(description="State (abbreviated):", style={'description_width': 'initial'})
housing_county_text = widgets.Text(description="County Name:", style={'description_width': 'initial'})

# Create a VBox layout to contain the widgets
housing_widget_box = widgets.VBox([
    housing_bedrooms_dropdown,
    housing_state_dropdown,
    housing_county_text,
])

display(housing_widget_box)

# ----------- Create widgets for LOAN FEATURES --------------------------------------------------------------------------
# loan features: year, county_code, applicant_income_000s, loan_amount_000s.
header = Markdown("**Loan Applicant Features**")
display(header)
loan_income__int_text = widgets.BoundedIntText(description="Annual Gross Income:", min=1, max=70, style={'description_width': 'initial'})
loan_amount__int_text = widgets.BoundedIntText(description="Loan Amount:", min=1, max=300, style={'description_width': 'initial'})

# Create a VBox layout to contain the widgets
loan_widget_box = widgets.VBox([
    loan_income__int_text,
    loan_amount__int_text,
])

# ------------------------- SUBMIT BUTTON AND CLEAR OUTPUT BUTTON ---------------------------------------------------
submit_button = widgets.Button(description="Submit")
output = widgets.Output()
submit_button.on_click(make_estimate)

def clear_output_button_clicked(button):
    with output:
        clear_output()
    
clear_output_button = widgets.Button(description="Clear Output")
clear_output_button.on_click(clear_output_button_clicked)

# --------------------------- DISPLAY ---------------------------------------------------------------------------------

# Display the VBox layout
display(loan_widget_box, submit_button, clear_output_button, output)

**Note 1:** Annual gross income and loan amount are in thousands of dollars. Example: if you mean to type $36,000 --> input 36.

**Note 2:** Make sure to capitalize the county name and end it with 'County'. For example, 'El Dorado County'.

**Housing Features**

VBox(children=(Dropdown(description='# Bedrooms:', options=(1, 2, 3, 4), value=1), Text(value='', description=…

**Loan Applicant Features**

VBox(children=(BoundedIntText(value=1, description='Annual Gross Income:', max=70, min=1, style=DescriptionSty…

Button(description='Submit', style=ButtonStyle())

Button(description='Clear Output', style=ButtonStyle())

Output()

In [11]:
# list of county and state codes: https://www2.census.gov/programs-surveys/decennial/2010/partners/pdf/FIPS_StateCounty_Code.pdf

# Testing

## Manual Testing