In [268]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## INPUT

In [269]:
df = pd.DataFrame(
    [
        ['Conventional', 'One-to-four family dwelling (other than manufactured housing)', 'Home improvement', 'Owner-occupied as a principal dwelling', 588, 'Male', 313], # APPROVED
        ['VA-guaranteed', 'One-to-four family dwelling (other than manufactured housing)', 'Home purchase', 'Owner-occupied as a principal dwelling', 600, 'Female', 80], # APPROVED
        ['FHA-insured', 'One-to-four family dwelling (other than manufactured housing)', 'Home purchase', 'Owner-occupied as a principal dwelling', 589, 'Male', 124], # NOT APPROVED
        ['Conventional', 'One-to-four family dwelling (other than manufactured housing)', 'Home improvement', 'Owner-occupied as a principal dwelling', 35, 'Male', 12], # NOT APPROVED
    ], columns=['loan_type', 'property_type', 'purpose', 'occupancy', 'amount', 'sex', 'income'])


In [270]:
df.head()

Unnamed: 0,loan_type,property_type,purpose,occupancy,amount,sex,income
0,Conventional,One-to-four family dwelling (other than manufa...,Home improvement,Owner-occupied as a principal dwelling,588,Male,313
1,VA-guaranteed,One-to-four family dwelling (other than manufa...,Home purchase,Owner-occupied as a principal dwelling,600,Female,80
2,FHA-insured,One-to-four family dwelling (other than manufa...,Home purchase,Owner-occupied as a principal dwelling,589,Male,124
3,Conventional,One-to-four family dwelling (other than manufa...,Home improvement,Owner-occupied as a principal dwelling,35,Male,12


In [271]:
df.columns

Index(['loan_type', 'property_type', 'purpose', 'occupancy', 'amount', 'sex',
       'income'],
      dtype='object')

# Encode Categorical Features

In [272]:
df.dtypes

loan_type        object
property_type    object
purpose          object
occupancy        object
amount            int64
sex              object
income            int64
dtype: object

In [273]:
from sklearn.compose import make_column_selector as selector

categorical_columns_selector = selector(dtype_include=object)
categorical_columns = categorical_columns_selector(df)
categorical_columns


['loan_type', 'property_type', 'purpose', 'occupancy', 'sex']

In [274]:
df = pd.get_dummies(df, columns=categorical_columns)
df.head()

Unnamed: 0,amount,income,loan_type_Conventional,loan_type_FHA-insured,loan_type_VA-guaranteed,property_type_One-to-four family dwelling (other than manufactured housing),purpose_Home improvement,purpose_Home purchase,occupancy_Owner-occupied as a principal dwelling,sex_Female,sex_Male
0,588,313,1,0,0,1,1,0,1,0,1
1,600,80,0,0,1,1,0,1,1,1,0
2,589,124,0,1,0,1,0,1,1,0,1
3,35,12,1,0,0,1,1,0,1,0,1


### Making sure the data have the same columns count

In [275]:
MODEL_COLUMNS = ['amount', 'income', 'loan_type_Conventional',
       'loan_type_FHA-insured', 'loan_type_FSA/RHS-guaranteed',
       'loan_type_VA-guaranteed', 'property_type_Manufactured housing',
       'property_type_One-to-four family dwelling (other than manufactured housing)',
       'purpose_Home improvement', 'purpose_Home purchase',
       'purpose_Refinancing', 'occupancy_Not applicable',
       'occupancy_Not owner-occupied as a principal dwelling',
       'occupancy_Owner-occupied as a principal dwelling', 'sex_Female',
       'sex_Information not provided by applicant in mail, Internet, or telephone application', 'sex_Male', 'sex_Not applicable']
for col in MODEL_COLUMNS:
    if col not in df.columns:
        df[col] = [0] * len(df)
df

Unnamed: 0,amount,income,loan_type_Conventional,loan_type_FHA-insured,loan_type_VA-guaranteed,property_type_One-to-four family dwelling (other than manufactured housing),purpose_Home improvement,purpose_Home purchase,occupancy_Owner-occupied as a principal dwelling,sex_Female,sex_Male,loan_type_FSA/RHS-guaranteed,property_type_Manufactured housing,purpose_Refinancing,occupancy_Not applicable,occupancy_Not owner-occupied as a principal dwelling,"sex_Information not provided by applicant in mail, Internet, or telephone application",sex_Not applicable
0,588,313,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0
1,600,80,0,0,1,1,0,1,1,1,0,0,0,0,0,0,0,0
2,589,124,0,1,0,1,0,1,1,0,1,0,0,0,0,0,0,0
3,35,12,1,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0


In [276]:
df = pd.DataFrame(df[MODEL_COLUMNS])
df

Unnamed: 0,amount,income,loan_type_Conventional,loan_type_FHA-insured,loan_type_FSA/RHS-guaranteed,loan_type_VA-guaranteed,property_type_Manufactured housing,property_type_One-to-four family dwelling (other than manufactured housing),purpose_Home improvement,purpose_Home purchase,purpose_Refinancing,occupancy_Not applicable,occupancy_Not owner-occupied as a principal dwelling,occupancy_Owner-occupied as a principal dwelling,sex_Female,"sex_Information not provided by applicant in mail, Internet, or telephone application",sex_Male,sex_Not applicable
0,588,313,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0
1,600,80,0,0,0,1,0,1,0,1,0,0,0,1,1,0,0,0
2,589,124,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0
3,35,12,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0


In [277]:
import pickle
model = pickle.load(open('model', 'rb'))
result = model.predict(df)
print(result) # Should be [1 1 0 0]

[1 1 1 0]
