In [104]:
import pickle
from sklearn.tree import export_text
import pandas as pd
import numpy as np

In [105]:
filename = 'model.pkl'
with open(filename, 'rb') as file:
    clf = pickle.load(file)

In [106]:
print(clf.get_depth())
print(clf.get_n_leaves())

44
22015


In [107]:
for name, value in zip(clf.features, clf.feature_importances_):
  print(f'{name}: {value}')

Occupation: 0.04428760617502819
Annual_Income: 0.08428685897991424
Num_Bank_Accounts: 0.027117935706473032
Num_Credit_Card: 0.037189881098532176
Interest_Rate: 0.08695334266350466
Num_of_Loan: 0.0294877167971107
Credit_Mix: 0.11628963466589795
Outstanding_Debt: 0.20953352419518062
Credit_History_Age: 0.36485349971835856


In [108]:
df = pd.read_csv("./clean_credit_score_classification.csv")
df['Occupation'] = df['Occupation'].astype('category').cat.codes
df['Credit_Mix'] = df['Credit_Mix'].astype('category').cat.codes
df['Payment_Behaviour'] = df['Payment_Behaviour'].astype('category').cat.codes
df['Payment_of_Min_Amount'] = df['Payment_of_Min_Amount'].astype('category').cat.codes
df['Type_of_Loan'] = df['Type_of_Loan'].str.replace(', and', ',')
loans_set = list(set([y for x in df['Type_of_Loan'] for y in x.split(', ')]))
df['Type_of_Loan'] = df['Type_of_Loan'].map(lambda line: np.array([True if x in line.split(', ') else False for x in loans_set]))
for item in loans_set:
    df[item] = df['Type_of_Loan'].map(lambda x: x[loans_set.index(item)])
df.drop(['Type_of_Loan'], axis=1, inplace=True)
X = df[clf.features]
X = np.array(X)

  df = pd.read_csv("./clean_credit_score_classification.csv")


In [110]:
def get_decision_path(clf, X_test):
    n_nodes = clf.tree_.node_count
    children_left = clf.tree_.children_left
    children_right = clf.tree_.children_right
    feature = clf.tree_.feature
    threshold = clf.tree_.threshold
    node_indicator = clf.decision_path(X_test)
    leaf_id = clf.apply(X_test)
    classes = clf.predict(X_test)

    for sample_id in range(0, len(X_test)):
        node_index = node_indicator.indices[
            node_indicator.indptr[sample_id] : node_indicator.indptr[sample_id + 1]
        ]

        print("Rules used to predict sample {id}:\n".format(id=sample_id))
        for node_id in node_index:
            # continue to the next node if it is a leaf node
            if leaf_id[sample_id] == node_id:
                continue

            # check if value of the split feature for sample 0 is below threshold
            if X_test[sample_id, feature[node_id]] <= threshold[node_id]:
                threshold_sign = "<="
            else:
                threshold_sign = ">"

            print(
                "decision node {node} : (X_test[{sample}, {feature}] = {value}) "
                "{inequality} {threshold})".format(
                    node=node_id,
                    sample=sample_id,
                    feature=clf.features[feature[node_id]],
                    value=X_test[sample_id, feature[node_id]],
                    inequality=threshold_sign,
                    threshold=threshold[node_id],
                )
            )
        print(f'Sample {sample_id} is {clf.labels[classes[sample_id]]}')

In [111]:
get_decision_path(clf, X[100005:100020])

Rules used to predict sample 0:

decision node 0 : (X_test[0, Outstanding_Debt] = 605.03) <= 1500.0050048828125)
decision node 1 : (X_test[0, Credit_Mix] = 1.0) <= 1.5)
decision node 2 : (X_test[0, Interest_Rate] = 6.0) <= 13.5)
decision node 3 : (X_test[0, Num_Credit_Card] = 4.0) > 2.5)
decision node 2497 : (X_test[0, Num_Credit_Card] = 4.0) <= 5.5)
decision node 2498 : (X_test[0, Num_Bank_Accounts] = 2.0) <= 5.5)
decision node 2499 : (X_test[0, Outstanding_Debt] = 605.03) <= 1473.27001953125)
decision node 2500 : (X_test[0, Num_Bank_Accounts] = 2.0) > 1.5)
decision node 5438 : (X_test[0, Occupation] = 13.0) <= 13.5)
decision node 5439 : (X_test[0, Annual_Income] = 34847.84) <= 110502.6171875)
decision node 5440 : (X_test[0, Num_Credit_Card] = 4.0) <= 4.5)
decision node 5441 : (X_test[0, Credit_History_Age] = 328.0) > 201.5)
decision node 5717 : (X_test[0, Num_of_Loan] = 1.0) > 0.5)
decision node 6283 : (X_test[0, Interest_Rate] = 6.0) <= 7.5)
decision node 6284 : (X_test[0, Annual_In