In [2]:
import pandas as pd
import numpy as np
import json
import joblib

In [3]:
# Open Files

with open('knn_untuned.pkl', 'rb') as file_1 :
    knn_untuned = joblib.load(file_1)

with open('knn_tuned.pkl', 'rb') as file_2 :
    knn_tuned = joblib.load(file_2)

In [4]:
knn_untuned

In [5]:
knn_tuned

In [7]:
# Creating new DataFrame for the Model Inference
df = pd.DataFrame({
    'limit_balance' : [50000,75000,30000], 
    'sex': [1,0,0], 
    'education_level': [1,2,3],
    'marital_status': [1,2,3], 
    'age': [43,32,33],
    'pay_1' : [0,2,3], 
    'pay_2': [-1,0,4], 
    'pay_3': [-2,2,3], 
    'pay_4': [0,1,5], 
    'pay_5': [0,1,3], 
    'pay_6': [-1,2,4], 
    'bill_amt_1': [84729,26539,148689],
    'bill_amt_2': [24459,53920,54116], 
    'bill_amt_3': [26726,184022,881], 
    'bill_amt_4': [54116,47566,92767], 
    'bill_amt_5': [39634,6072,48393], 
    'bill_amt_6': [92767,75696,133580],
    'pay_amt_1': [26894,11888,1700], 
    'pay_amt_2': [24574,3699,2007], 
    'pay_amt_3': [2000,1030,3171], 
    'pay_amt_4': [2330,2000,550], 
    'pay_amt_5': [1600,10586,2500],
    'pay_amt_6': [2705,4913,3699]
})
df

Unnamed: 0,limit_balance,sex,education_level,marital_status,age,pay_1,pay_2,pay_3,pay_4,pay_5,...,bill_amt_3,bill_amt_4,bill_amt_5,bill_amt_6,pay_amt_1,pay_amt_2,pay_amt_3,pay_amt_4,pay_amt_5,pay_amt_6
0,50000,1,1,1,43,0,-1,-2,0,0,...,26726,54116,39634,92767,26894,24574,2000,2330,1600,2705
1,75000,0,2,2,32,2,0,2,1,1,...,184022,47566,6072,75696,11888,3699,1030,2000,10586,4913
2,30000,0,3,3,33,3,4,3,5,3,...,881,92767,48393,133580,1700,2007,3171,550,2500,3699


Defining repayment_status and utilization_rate

In [8]:
# repayment_status_group
repay_status = df['pay_1'] + df['pay_2'] + df['pay_3'] + df['pay_4'] +df['pay_5'] + df['pay_6']
df['repayment_status'] = repay_status/6
# utilization_rate
bill_amt = (df['bill_amt_1'] + df['bill_amt_2'] + df['bill_amt_3'] + df['bill_amt_4'] + df['bill_amt_5'] + df['bill_amt_6'])/6
df['utilization_rate'] = bill_amt/df['limit_balance'] *100

Define Function for Mapping the repayment_status_group, pay_1_group, utilization_rate_group

In [10]:
# Mapping the repayment status to both repayment_status and pay_1 column

def map_to_payment(x):
    if x <= 0:
        return 'Good Customer'
    elif 0 < x <= 1:
        return 'Early Warned Customer'
    elif 1 < x <= 2:
        return 'Medium Warned Customer'
    elif x > 2:
        return 'Highly Warned Customer'
    else:
        return 'Other'

def map_to_overall(x):
    if x <= 0:
        return 'Good Overall'
    elif 0 < x <= 1:
        return 'Early Warned Overall'
    elif 1 < x <= 2:
        return 'Medium Warned Overall'
    elif x > 2:
        return 'Highly Warned Overall'
    else:
        return 'Other'

# Mapping the repayment status to both repayment_status and pay_1 column

def map_to_utilization(x):
    if x <= 10:
        return 'Excellent Utilization'
    elif 10 < x <= 30:
        return 'Good Utilization'
    elif 30 < x < 100:
        return 'Early Warning Utilization'
    elif x >= 100:
        return 'High Warning Utilization'
    else:
        return 'Other'

df['utilization_group'] = df['utilization_rate'].apply(map_to_utilization)
df['repayment_status_group'] = df['repayment_status'].apply(map_to_overall)
df['pay_1_group'] = df['pay_1'].apply(map_to_payment)

Reordering the Column into predict-ready dataframe

In [12]:
df_predict = df[['repayment_status_group','pay_1_group','utilization_group']]

In [13]:
# Predict the LKNN Untuned and KNN Tuned pipeline to the new_data
knn_untuned_result = knn_untuned.predict(df_predict)
knn_tuned_result = knn_tuned.predict(df_predict)

In [14]:
df['knn_untuned_class'] = knn_untuned_result
df['knn_tuned_class'] = knn_tuned_result
df

Unnamed: 0,limit_balance,sex,education_level,marital_status,age,pay_1,pay_2,pay_3,pay_4,pay_5,...,pay_amt_4,pay_amt_5,pay_amt_6,repayment_status,utilization_rate,repayment_status_group,pay_1_group,utilization_group,knn_untuned_class,knn_tuned_class
0,50000,1,1,1,43,0,-1,-2,0,0,...,2330,1600,2705,-0.666667,107.477,Good Overall,Good Customer,High Warning Utilization,0,0
1,75000,0,2,2,32,2,0,2,1,1,...,2000,10586,4913,1.333333,87.514444,Medium Warned Overall,Medium Warned Customer,Early Warning Utilization,1,1
2,30000,0,3,3,33,3,4,3,5,3,...,550,2500,3699,3.666667,265.792222,Highly Warned Overall,Highly Warned Customer,High Warning Utilization,1,1


**Model Inference Conclusion**

Based on the predict result, the model shows the same result for those 3 dataframes