In [67]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## INPUT

In [68]:
df = pd.DataFrame(
    [
        ['Conventional', 'One-to-four family dwelling (other than manufactured housing)', 'Home improvement', 'Owner-occupied as a principal dwelling', 588, 'Male', 313], # APPROVED - FROM DATASET
        ['VA-guaranteed', 'One-to-four family dwelling (other than manufactured housing)', 'Home purchase', 'Owner-occupied as a principal dwelling', 600, 'Female', 80], # APPROVED - FROM DATASET
        ['FHA-insured', 'One-to-four family dwelling (other than manufactured housing)', 'Home purchase', 'Owner-occupied as a principal dwelling', 589, 'Male', 124], # NOT APPROVED - FROM DATASET
        ['Conventional', 'One-to-four family dwelling (other than manufactured housing)', 'Home improvement', 'Owner-occupied as a principal dwelling', 35, 'Male', 12], # NOT APPROVED - FROM DATASET

        ['Conventional', 'One-to-four family dwelling (other than manufactured housing)', 'Home improvement', 'Owner-occupied as a principal dwelling', 500, 'Male', 10], # NOT APPROVED - TESTING
        ['Conventional', 'One-to-four family dwelling (other than manufactured housing)', 'Home improvement', 'Owner-occupied as a principal dwelling', 10, 'Male', 500], # APPROVED - TESTING
    ], columns=['loan_type', 'property_type', 'purpose', 'occupancy', 'amount', 'sex', 'income'])


# Encode Categorical Features

In [69]:
df.dtypes

loan_type        object
property_type    object
purpose          object
occupancy        object
amount            int64
sex              object
income            int64
dtype: object

In [70]:
from sklearn.compose import make_column_selector as selector

categorical_columns_selector = selector(dtype_include=object)
categorical_columns = categorical_columns_selector(df)
categorical_columns


['loan_type', 'property_type', 'purpose', 'occupancy', 'sex']

In [71]:
df = pd.get_dummies(df, columns=categorical_columns)

### Making sure the data have the same columns count

In [72]:
MODEL_COLUMNS = ['amount', 'income', 'loan_type_Conventional',
       'loan_type_FHA-insured', 'loan_type_FSA/RHS-guaranteed',
       'loan_type_VA-guaranteed', 'property_type_Manufactured housing',
       'property_type_One-to-four family dwelling (other than manufactured housing)',
       'purpose_Home improvement', 'purpose_Home purchase',
       'purpose_Refinancing', 'occupancy_Not applicable',
       'occupancy_Not owner-occupied as a principal dwelling',
       'occupancy_Owner-occupied as a principal dwelling', 'sex_Female',
       'sex_Information not provided by applicant in mail, Internet, or telephone application', 'sex_Male', 'sex_Not applicable']
for col in MODEL_COLUMNS:
    if col not in df.columns:
        df[col] = [0] * len(df)

In [73]:
df = pd.DataFrame(df[MODEL_COLUMNS])

In [74]:
import pickle
model = pickle.load(open('model', 'rb'))
result = model.predict(df)
print(result) # Should be [1 1 0 0]

[1 1 1 0 0]
