In [1]:
import pandas as pd
import feast
import joblib
import pandas as pd
from feast import FeatureStore
from datetime import datetime
from pathlib import Path
from sklearn import tree
from sklearn.exceptions import NotFittedError
from sklearn.preprocessing import OrdinalEncoder
from sklearn.utils.validation import check_is_fitted

In [2]:
home_dir = str(Path.home())

repo_path = home_dir
model_filename = f"{home_dir}/model.bin"
encoder_filename = f"{home_dir}/encoder.bin"

target = "loan_status"

In [3]:
# load model

if not Path(model_filename).exists() or not Path(encoder_filename).exists():
    print("You need to train the model first!")

classifier = joblib.load(model_filename)

In [4]:
fs = FeatureStore(repo_path=repo_path)   # create Feast object

  from urllib3.contrib.pyopenssl import orig_util_SSLContext as SSLContext


In [5]:
# Make online prediction (using DynamoDB for retrieving online features)
request = {
    "zipcode": [76104],
    "dob_ssn": ["19630621_4278"],
    "person_age": [133],
    "person_income": [59000],
    "person_home_ownership": ["RENT"],
    "person_emp_length": [123.0],
    "loan_intent": ["PERSONAL"],
    "loan_amnt": [35000],
    "loan_int_rate": [16.02],
}

In [6]:
# Get online features from Feast

zipcode = request["zipcode"][0]
dob_ssn = request["dob_ssn"][0]

feature_vector = fs.get_online_features(
    entity_rows=[{"zipcode": zipcode, "dob_ssn": dob_ssn}],
    features=[
        "zipcode_features:city",
        "zipcode_features:state",
        "zipcode_features:location_type",
        "zipcode_features:tax_returns_filed",
        "zipcode_features:population",
        "zipcode_features:total_wages",
        "credit_history:credit_card_due",
        "credit_history:mortgage_due",
        "credit_history:student_loan_due",
        "credit_history:vehicle_loan_due",
        "credit_history:hard_pulls",
        "credit_history:missed_payments_2y",
        "credit_history:missed_payments_1y",
        "credit_history:missed_payments_6m",
        "credit_history:bankruptcies",
    ],
).to_dict()

In [27]:
fs.list_feature_views()

KeyboardInterrupt: 

In [19]:

fs.get_online_features(
    entity_rows=[{"zipcode": 76104}, {"zipcode": 7675}],
    features=[
        "zipcode_features:city",
        "zipcode_features:state",
    ],
).to_dict()




{'zipcode': [76104], 'dob_ssn': ['19630621_4278'], 'tax_returns_filed': [None], 'state': [None], 'location_type': [None], 'city': [None], 'total_wages': [None], 'population': [None], 'missed_payments_6m': [0], 'bankruptcies': [0], 'hard_pulls': [1], 'credit_card_due': [3343], 'student_loan_due': [44375], 'mortgage_due': [378847], 'missed_payments_2y': [0], 'vehicle_loan_due': [11506], 'missed_payments_1y': [0]}


In [10]:
print(feature_vector)

In [11]:
# Join features to request features

features = request.copy()
features.update(feature_vector)
features_df = pd.DataFrame.from_dict(features)

Unnamed: 0,zipcode,dob_ssn,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_amnt,loan_int_rate,tax_returns_filed,...,population,missed_payments_6m,bankruptcies,hard_pulls,credit_card_due,student_loan_due,mortgage_due,missed_payments_2y,vehicle_loan_due,missed_payments_1y
0,76104,19630621_4278,133,59000,RENT,123.0,PERSONAL,35000,16.02,,...,,0,0,1,3343,44375,378847,0,11506,0


In [12]:
features_df.head()

In [13]:
# Apply ordinal encoding to categorical features

categorical_features = [
    "person_home_ownership",
    "loan_intent",
    "city",
    "state",
    "location_type",
]

# load encoder
encoder = joblib.load(encoder_filename)

# apply encoding
features_df[categorical_features] = encoder.transform(
    features_df[categorical_features]
)

Unnamed: 0,zipcode,dob_ssn,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_amnt,loan_int_rate,tax_returns_filed,...,population,missed_payments_6m,bankruptcies,hard_pulls,credit_card_due,student_loan_due,mortgage_due,missed_payments_2y,vehicle_loan_due,missed_payments_1y
0,76104,19630621_4278,133,59000,-1.0,123.0,-1.0,35000,16.02,,...,,0,0,1,3343,44375,378847,0,11506,0


In [14]:
features_df.head()

In [15]:
from sklearn.impute import SimpleImputer
import numpy as np
imputer = SimpleImputer(strategy="most_frequent")

ValueError: Input X contains NaN.
DecisionTreeClassifier does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [None]:
# Sort columns
features_df = features_df.reindex(sorted(features_df.columns), axis=1)

# Drop unnecessary columns
features_df = features_df[features_df.columns.drop("zipcode").drop("dob_ssn")]

# Make prediction
features_df["prediction"] = classifier.predict(features_df)

result = features_df["prediction"].iloc[0]

In [None]:
if result == 0:
    print("Loan approved!")
elif result == 1:
    print("Loan rejected!")