In [1]:
import joblib
import pandas as pd
from feast import FeatureStore
from pathlib import Path

In [4]:
# Please set these variables in your local notebook (if you want to run this locally)
# import os
# os.environ['FEAST_S3_BUCKET']="s-mltest2"
# os.environ['FEAST_IAM_ROLE_ARN']="arn:aws:iam::834092605248:role/s-mltest2-for-redshift"
# os.environ['FEAST_REDSHIFT_CLUSTER']="s-mltest2"
# os.environ['FEAST_REGION']="eu-west-3"
# os.environ['AWS_DEFAULT_PROFILE'] = 'gdc'

In [5]:
repo_path = "../featurestore/feast/repo"
model_filename = "model.bin"
encoder_filename = "encoder.bin"

In [6]:
# load the model

if not Path(model_filename).exists() or not Path(encoder_filename).exists():
    print("You need to train the model first!")

classifier = joblib.load(model_filename)

In [7]:
fs = FeatureStore(repo_path=repo_path)   # create Feast object

  from urllib3.contrib.pyopenssl import orig_util_SSLContext as SSLContext


In [19]:
# Make online prediction (using DynamoDB for retrieving online features)
request = {
    "zipcode": 76104,
    "dob_ssn": "19630621_4278",
    "person_age": 33,
    "person_income": 59000,
    "person_home_ownership": "RENT",
    "person_emp_length": 12.0,
    "loan_intent": "PERSONAL",
    "loan_amnt": 35000,
    "loan_int_rate": 16.02,
}

In [20]:
# Get online features from Feast
feature_vector = fs.get_online_features(
    entity_rows=[
        {
            "zipcode": request["zipcode"],
            "dob_ssn": request["dob_ssn"],
            "person_home_ownership": request["person_home_ownership"],
            "loan_intent": request["loan_intent"],
        }
    ],
    features=[
        "zipcode_features:city",
        "zipcode_features:state",
        "zipcode_features:location_type",
        "zipcode_features:tax_returns_filed",
        "zipcode_features:population",
        "zipcode_features:total_wages",
        "credit_history:credit_card_due",
        "credit_history:mortgage_due",
        "credit_history:student_loan_due",
        "credit_history:vehicle_loan_due",
        "credit_history:hard_pulls",
        "credit_history:missed_payments_2y",
        "credit_history:missed_payments_1y",
        "credit_history:missed_payments_6m",
        "credit_history:bankruptcies",
        "person_home_ownership:person_home_ownership_enc",
        "loan_intent:loan_intent_enc",
    ],
).to_dict()

In [21]:
print(feature_vector)

{'zipcode': [76104], 'dob_ssn': ['19630621_4278'], 'location_type': ['PRIMARY'], 'city': ['FORT WORTH'], 'population': [10534], 'total_wages': [142325465], 'state': ['TX'], 'tax_returns_filed': [6058], 'credit_card_due': [3343], 'missed_payments_2y': [0], 'missed_payments_6m': [0], 'vehicle_loan_due': [11506], 'hard_pulls': [1], 'student_loan_due': [44375], 'bankruptcies': [0], 'missed_payments_1y': [0], 'mortgage_due': [378847], 'person_home_ownership_enc': [0], 'loan_intent_enc': [0]}


In [22]:
# Join features to request features

features = request.copy()
del features['person_home_ownership']
del features['loan_intent']
features.update(feature_vector)
features_df = pd.DataFrame.from_dict(features)

In [23]:
features_df.head()

Unnamed: 0,zipcode,dob_ssn,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,location_type,city,population,...,missed_payments_2y,missed_payments_6m,vehicle_loan_due,hard_pulls,student_loan_due,bankruptcies,missed_payments_1y,mortgage_due,person_home_ownership_enc,loan_intent_enc
0,76104,19630621_4278,33,59000,12.0,35000,16.02,PRIMARY,FORT WORTH,10534,...,0,0,11506,1,44375,0,0,378847,0,0


In [24]:
# Apply ordinal encoding to categorical features

categorical_features = [
    "city",
    "state",
    "location_type",
]

# load encoder
encoder = joblib.load(encoder_filename)

# apply encoding
features_df[categorical_features] = encoder.transform(
    features_df[categorical_features]
)

In [25]:
features_df.head()

Unnamed: 0,zipcode,dob_ssn,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,location_type,city,population,...,missed_payments_2y,missed_payments_6m,vehicle_loan_due,hard_pulls,student_loan_due,bankruptcies,missed_payments_1y,mortgage_due,person_home_ownership_enc,loan_intent_enc
0,76104,19630621_4278,33,59000,12.0,35000,16.02,0.0,3115.0,10534,...,0,0,11506,1,44375,0,0,378847,0,0


In [26]:
# Sort columns
features_df = features_df.reindex(sorted(features_df.columns), axis=1)

# Drop unnecessary columns
features_df = features_df[features_df.columns.drop("zipcode").drop("dob_ssn")]

# Make prediction
features_df["prediction"] = classifier.predict(features_df)

result = features_df["prediction"].iloc[0]

In [27]:
if result == 0:
    print("Loan approved!")
elif result == 1:
    print("Loan rejected!")

Loan approved!
