In [3]:
import sys
import flask
import pickle
import os
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import matplotlib.pyplot as plt

df=pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data",sep=" ",header=None)
headers=["status","duration","credit_history",\
         "purpose","credit_amount","saving_accounts","employment",\
         "investment_as_income_percentage","Personal status and sex",\
         "debtors","residence","property","age",\
        "installment_plans","housing","number_of_credits",\
        "job","dependent","telephone","foreign_worker","Cost Matrix(Risk)"]
df.columns=headers
df.to_csv("german_data_credit_cat.csv",index=False) #save as csv file

#for structuring only
status ={'A14':"no checking account",'A11':"...<0 DM", 'A12': "0 <= ...<200 DM",'A13':"... >= 200 DM "}
df["status"]=df["status"].map(status)

credit_history={"A34":"Critical", "A33":"Delay in Past","A32":"Existing Credits", "A31":"Paid Credits", "A30":"No Credits"}
df["credit_history"]=df["credit_history"].map(credit_history)

purpose={"A40" : "new car", "A41" : "used car", "A42" : "furniture/equipment", "A43" :"radio/television" , "A44" : "domestic appliances", "A45" : "repairs", "A46" : "education", 'A47' : 'vacation','A48' : 'retraining','A49' : 'business','A410' : 'others'}
df["purpose"]=df["purpose"].map(purpose)

saving_accounts={"A65" : "unknown/ no savings account","A61" :"... < 100 DM","A62" : "100 <= ... < 500 DM","A63" :"500 <= ... < 1000 DM", "A64" :".. >= 1000 DM"}
df["saving_accounts"]=df["saving_accounts"].map(saving_accounts)

employment={'A75':"7+", 'A74':"7",  'A73':"4", 'A72':"1",'A71':"unemployed"}
df["employment"]=df["employment"].map(employment)

Personal_status_and_sex={ 'A95':"female:single",'A94':"male:married/widowed",'A93':"male:single", 'A92':"female:divorced/separated/married", 'A91':"male:divorced/separated"}
df["Personal status and sex"]=df["Personal status and sex"].map(Personal_status_and_sex)

debtors={'A101':"none", 'A102':"co-applicant", 'A103':"guarantor"}
df["debtors"]=df["debtors"].map(debtors)

property={'A121':"real estate", 'A122':"life insurance", 'A123':"car or other", 'A124':"no property"}
df["property"]=df["property"].map(property)

installment_plans={'A143':"none", 'A142':"store", 'A141':"bank"}
df["installment_plans"]=df["installment_plans"].map(installment_plans)


housing={'A153':"free", 'A152':"own", 'A151':"rent"}
df["housing"]=df["housing"].map(housing)


job={'A174':"highly skilled", 'A173':"skilled employee/official", 'A172':"unskilled-resident", 'A171':"unemployed/unskilled-non resident"}
df["job"]=df["job"].map(job)

telephone={'A192':"yes", 'A191':"none"}
df["telephone"]=df["telephone"].map(telephone)

foreign_worker={'A201':"yes", 'A202':"no"}
df["foreign_worker"]=df["foreign_worker"].map(foreign_worker)

risk={1:"Good Risk", 2:"Bad Risk"}
df["Cost Matrix(Risk)"]=df["Cost Matrix(Risk)"].map(risk)
del df['Cost Matrix(Risk)']
df.to_csv ('convertloan.csv', index=None)

# read first 10 row of dataset
# df.head(10)

## **Testing code for model prediction**

In [4]:
import csv
import pandas as pd
df=pd.read_csv('convertloan.csv')
for index, row in df.iterrows():
    df=row.to_dict()
    print(df)
    x = np.zeros(57)
    x[0] = int(df['duration'])#month
    x[1] = float(df['credit_amount'])
    x[2] = float(df['investment_as_income_percentage'])
    x[3] = int(df['residence'])
    x[4] = True if float(df['age']) >= 25 else False
    x[5] = int(df['number_of_credits'])
    x[6] = int(df['dependent']) #people_liable_for
    x[7] = True if (df['status']) == '...< 0 DM' else False
    x[8] = True if (df['status']) == '0 <= ...< 200 DM' else False
    x[9] = True if (df['status']) == '... >= 200 DM' else False
    x[10] = True if (df['status']) == 'no checking account' else False
    x[11] = True if (df['credit_history']) == 'No Credits' else False #credit_history=A30 #No Credit
    x[12] = True if (df['credit_history']) == 'Paid Credits' else False#credit_history=A31 #Paid Credits
    x[13] = True if (df['credit_history']) == 'Existing Credits' else False#credit_history=A32 #Existing Credits
    x[14] = True if (df['credit_history']) == 'Delay in Past' else False#credit_history=A33 #Delay in Past
    x[15] = True if (df['credit_history']) == 'Critical' else False#credit_history=A34 #critical
    x[16] = True if (df['purpose']) == 'new car' else False #purpose=A40
    x[17] = True if (df['purpose']) == 'used car' else False #purpose=A41
    x[18] = True if (df['purpose']) == 'others' else False #purpose=A410
    x[19] = True if (df['purpose']) == 'furniture/equipment' else False #purpose=A42
    x[20] = True if (df['purpose']) == 'radio/television' else False #purpose=A43
    x[21] = True if (df['purpose']) == 'domestic appliances' else False #purpose=A44
    x[22] = True if (df['purpose']) == 'repairs' else False #purpose=A45
    x[23] = True if (df['purpose']) == 'education' else False #purpose=A46
    x[24] = True if (df['purpose']) == 'retraining' else False #purpose=A48
    x[25] = True if (df['purpose']) == 'business' else False #purpose=A49
    x[26] = True if (df['saving_accounts']) == '... < 100 DM' else False #savings=A61 saving low
    x[27] = True if (df['saving_accounts']) == '100 <= ... < 500 DM' else False #savings=A62 medium
    x[28] = True if (df['saving_accounts']) == '500 <= ... < 1000 DM' else False #savings=A63 high
    x[29] = True if (df['saving_accounts']) == '.. >= 1000 DM' else False #savings=A64 very high
    x[30] = True if (df['saving_accounts']) == 'unknown/ no savings account' else False #savings=A65 no saving
    x[31] = True if (df['employment']) == 'unemployed'else False  #employment=A71 Unemployed
    x[32] = True if (df['employment']) == '1'else False  #employment=A72 1
    x[33] = True if (df['employment']) == '4'else False  #employment=A73 4
    x[34] = True if (df['employment']) == '7'else False  #employment=A74 7
    x[35] = True if (df['employment']) == '7+'else False  # employment=A75 7+
    x[36] = True if (df['debtors']) == 'none'else False #other_debtors=A101
    x[37] = True if (df['debtors']) == 'co-applicant'else False #other_debtors=A102
    x[38] = True if (df['debtors']) == 'guarantor'else False #other_debtors=A103
    x[39] = True if (df['property']) == 'real estate' else False #property=A121 real estate
    x[40] = True if (df['property']) == 'life insurance'else False #property=A122 life insurance
    x[41] = True if (df['property']) == 'car or other' else False #property=A123 car or other
    x[42] = True if (df['property']) == 'no property' else False #property=A124 no
    x[43] = True if (df['installment_plans']) == 'bank' else False #installment_plans=A141 bank
    x[44] = True if (df['installment_plans']) == 'stors' else False #installment_plans=A142 stors
    x[45] = True if (df['installment_plans']) == 'none' else False #installment_plans=A143 none
    x[46] = True if (df['housing']) == 'rent' else False #housing=A151 rent
    x[47] = True if (df['housing']) == 'own' else False #housing=A152  own
    x[48] = True if (df['housing']) == 'free' else False #housing=A153  free
    x[49] = True if (df['job']) == 'unemployed/unskilled-non resident' else False  #skill_level=A171 # A171 : unemployed/ unskilled - non-resident
    x[50] = True if (df['job']) == 'unskilled-resident' else False  #skill_level=A172# A172 : unskilled - resident
    x[51] = True if (df['job']) == 'skilled employee/official' else False  #skill_level=A173 # A173 : skilled employee / official
    x[52] = True if (df['job']) == 'highly skilled' else False  #skill_level=A174 # # A174 : management/ self-employed/highly qualified employee/ officer
    x[53] = True if (df['telephone']) == 'none' else False  #telephone=A191 none
    x[54] = True if (df['telephone']) == 'yes' else False  #telephone=A192 yes
    x[55] = True if (df['foreign_worker']) == 'yes' else False  #foreign_worker=A201 yes
    x[56] = True if (df['foreign_worker']) == 'no' else False  #foreign_worker=A202 no

    import os
    from sklearn.preprocessing import StandardScaler
    import joblib
    model = joblib.load("lmod_bias_model.pkl")
    scale_orig = StandardScaler()
    # x = [4.800e+01, 5.951e+03, 2.000e+00, 2.000e+00, 0.000e+00, 1.000e+00, 1.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 1.000e+00, 0.000e+00]
    X_train = scale_orig.fit_transform([x])
    print("x-----------------", x)
    pred_prob = model.predict(X_train)
    print('prob: ', pred_prob)


{'status': '...<0 DM', 'duration': 6, 'credit_history': 'Critical', 'purpose': 'radio/television', 'credit_amount': 1169, 'saving_accounts': 'unknown/ no savings account', 'employment': '7+', 'investment_as_income_percentage': 4, 'Personal status and sex': 'male:single', 'debtors': 'none', 'residence': 4, 'property': 'real estate', 'age': 67, 'installment_plans': 'none', 'housing': 'own', 'number_of_credits': 2, 'job': 'skilled employee/official', 'dependent': 1, 'telephone': 'yes', 'foreign_worker': 'yes'}
x----------------- [6.000e+00 1.169e+03 4.000e+00 4.000e+00 1.000e+00 2.000e+00 1.000e+00
 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
 0.000e+00 1.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.000e+00
 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
 0.000e+00 0.000e+00 1.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
 1.000e+00 1.000e+00 0.000e+00 0.000e+00 1.000e+00 0.000e+00 0.000e+00
 0.000e+00 0.000e+00 0.000e+00 1.000e+00 0