# Neural Network to predict loan risk

dataset used for training: https://www.kaggle.com/datasets/laotse/credit-risk-dataset

In [340]:
from tensorflow import keras 
print('Tensorflow/Keras: %s' % keras.__version__) 
from keras.models import Sequential 
from keras import Input 
from keras.layers import Dense 


import pandas as pd 
print('pandas: %s' % pd.__version__) 
import numpy as np 
print('numpy: %s' % np.__version__) 


import sklearn 
print('sklearn: %s' % sklearn.__version__) 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import classification_report 
import time

Tensorflow/Keras: 2.12.0
pandas: 2.0.2
numpy: 1.23.5
sklearn: 1.2.2


In [298]:
df = pd.read_csv("data/credit_risk_dataset.csv")
df.head()

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_status,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
0,22,59000,RENT,123.0,PERSONAL,D,35000,16.02,1,0.59,Y,3
1,21,9600,OWN,5.0,EDUCATION,B,1000,11.14,0,0.1,N,2
2,25,9600,MORTGAGE,1.0,MEDICAL,C,5500,12.87,1,0.57,N,3
3,23,65500,RENT,4.0,MEDICAL,C,35000,15.23,1,0.53,N,2
4,24,54400,RENT,8.0,MEDICAL,C,35000,14.27,1,0.55,Y,4


In [299]:
## Checking for Duplicates
dups = df.duplicated()
dups.value_counts() #There are 165 Duplicated rows

False    32416
True       165
Name: count, dtype: int64

In [19]:
## Removing the Duplicates
print(f"Shape of Data before removing duplicates -----> ({df.shape[0]},{df.shape[1]}) \n")
df.drop_duplicates(inplace=True)
print(f"Shape of Data after removing duplicates -----> ({df.shape[0]},{df.shape[1]})")

Shape of Data before removing duplicates -----> (32581,12) 

Shape of Data after removing duplicates -----> (32416,12)


In [23]:
# Set Pandas options to display more columns
pd.options.display.max_columns=50

# For columns with missing values, fill them in with column mean
# df=df.fillna(df.mean())

df.isnull().sum()

person_age                       0
person_income                    0
person_home_ownership            0
person_emp_length              887
loan_intent                      0
loan_grade                       0
loan_amnt                        0
loan_int_rate                 3095
loan_status                      0
loan_percent_income              0
cb_person_default_on_file        0
cb_person_cred_hist_length       0
dtype: int64

In [24]:
df['person_emp_length']=df['person_emp_length'].fillna(df['person_emp_length'].mean())
df['loan_int_rate']=df['loan_int_rate'].fillna(df['loan_int_rate'].mean())

#### categorical data to numeric

In [25]:
print(df['person_home_ownership'].unique())
print(df['loan_intent'].unique())
print(df['loan_grade'].unique())
print(df['cb_person_default_on_file'].unique())

['RENT' 'OWN' 'MORTGAGE' 'OTHER']
['PERSONAL' 'EDUCATION' 'MEDICAL' 'VENTURE' 'HOMEIMPROVEMENT'
 'DEBTCONSOLIDATION']
['D' 'B' 'C' 'A' 'E' 'F' 'G']
['Y' 'N']


In [27]:
df['person_home_ownership'].replace(['RENT', 'OWN', 'MORTGAGE', 'OTHER'],
                        [0, 1, 2, 3], inplace=True)
df['loan_intent'].replace(['PERSONAL', 'EDUCATION', 'MEDICAL', 'VENTURE', 'HOMEIMPROVEMENT', 'DEBTCONSOLIDATION'],
                        [0, 1, 2, 3, 4, 5], inplace=True)
df['loan_grade'].replace(['A', 'B', 'C', 'D', 'E', 'F', 'G'],
                        [0, 1, 2, 3, 4, 5, 6], inplace=True)
df['cb_person_default_on_file'].replace(['Y', 'N'],
                        [0, 1], inplace=True)

In [32]:
df.describe()

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_status,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
count,32416.0,32416.0,32416.0,32416.0,32416.0,32416.0,32416.0,32416.0,32416.0,32416.0,32416.0,32416.0
mean,27.747008,66091.64,0.913715,4.79051,2.340264,1.220508,9593.845632,11.017265,0.218688,0.17025,0.823235,5.811297
std,6.3541,62015.58,0.960899,4.088378,1.679273,1.167293,6322.730241,3.08304,0.413363,0.106812,0.381475,4.05903
min,20.0,4000.0,0.0,0.0,0.0,0.0,500.0,5.42,0.0,0.0,0.0,2.0
25%,23.0,38542.0,0.0,2.0,1.0,0.0,5000.0,8.49,0.0,0.09,1.0,3.0
50%,26.0,55000.0,0.0,4.0,2.0,1.0,8000.0,11.017265,0.0,0.15,1.0,4.0
75%,30.0,79218.0,2.0,7.0,4.0,2.0,12250.0,13.11,0.0,0.23,1.0,8.0
max,144.0,6000000.0,3.0,123.0,5.0,6.0,35000.0,23.22,1.0,0.83,1.0,30.0


#### Target variable is loan_status

In [33]:
df["loan_status"].value_counts(normalize=True)

loan_status
0    0.781312
1    0.218688
Name: proportion, dtype: float64

#### Split the data into train and test sets

In [40]:
X_df, X_test_df, y_df, y_test_df = train_test_split(df.drop('loan_status', axis=1), df['loan_status'],
                                        random_state=0,  test_size=0.2, stratify=df['loan_status'],
                                        shuffle=True)

In [76]:
y_df.describe()

count    25932.000000
mean         0.218687
std          0.413364
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000000
max          1.000000
Name: loan_status, dtype: float64

In [46]:
X = X_df.to_numpy()
y = y_df.to_numpy()
X_test = X_test_df.to_numpy()
y_test = y_test_df.to_numpy()

In [48]:
X[0].shape

(11,)

In [555]:
model = Sequential(name="Model-with-One-Input") # Model
model.add(Input(shape=X[0].shape, name='Input-Layer')) # Input Layer - need to speicfy the shape of inputs
model.add(Dense(12, activation='relu', name='h_l1')) # Hidden Layer, softplus(x) = log(exp(x) + 1)
model.add(Dense(8, activation='relu', name='h_l2')) # Hidden Layer, softplus(x) = log(exp(x) + 1)
model.add(Dense(1, activation='relu', name='Output-Layer')) # Output Layer, sigmoid(x) = 1 / (1 + exp(-x))

In [556]:
model.compile(optimizer='Adam', loss='binary_crossentropy')

In [557]:
model.fit(X, y)



<keras.callbacks.History at 0x7fd290629cd0>

In [558]:
model.evaluate(X_test, y_test)



11.921518325805664

In [559]:
sum(y_test)

1418

In [560]:
model.predict(X_test)



array([[1785.0743],
       [ 982.6855],
       [1887.7802],
       ...,
       [2054.8672],
       [ 326.7732],
       [2479.8965]], dtype=float32)

In [561]:
y_test

array([0, 1, 0, ..., 0, 0, 1])

In [562]:
model.predict(X_test)



array([[1785.0743],
       [ 982.6855],
       [1887.7802],
       ...,
       [2054.8672],
       [ 326.7732],
       [2479.8965]], dtype=float32)

In [563]:
sum(list(map(lambda x: 1 if (x>0.5)[0] else 0 , model.predict(X_test))) == y_test) / len(y_test)



0.21822948797038866

In [564]:
y_test

array([0, 1, 0, ..., 0, 0, 1])

In [565]:
all_w = []
activations = []
import json
for layer in model.layers:
    all_w.append(layer.weights[0].numpy().T.tolist())
    activations.append(layer.activation.__name__)

In [566]:
map_activation = {
    'identity': 0,
    'sigmoid': 1,
    'relu': 2,
}

In [567]:
activations = list(map(lambda x: map_activation[x], activations))

In [568]:
model_json = {
    'weights': all_w,
    'activation_functions': activations,
}
model_file = open('model.json','w')
json.dump(model_json, model_file)
model_file.close()

In [569]:
import os


NODE_URL = os.getenv("APTOS_NODE_URL", "https://fullnode.devnet.aptoslabs.com/v1")
FAUCET_URL = os.getenv(
    "APTOS_FAUCET_URL",
    "https://faucet.devnet.aptoslabs.com",
)

In [570]:
from aptos_sdk.account import Account, RotationProofChallenge
from aptos_sdk.account_address import AccountAddress
from aptos_sdk.authenticator import Authenticator, MultiEd25519Authenticator
from aptos_sdk.bcs import Serializer, Deserializer
from aptos_sdk.client import FaucetClient, RestClient
from aptos_sdk.ed25519 import PublicKey, Signature
from aptos_sdk.transactions import (
    EntryFunction,
    RawTransaction,
    Script,
    ScriptArgument,
    SignedTransaction,
    TransactionArgument,
    TransactionPayload,
)
from aptos_sdk.type_tag import StructTag, TypeTag

In [571]:
ser = Serializer()
seq_ser = Serializer.sequence_serializer(Serializer.u128)
seq_ser = Serializer.sequence_serializer(seq_ser)
seq_ser = Serializer.sequence_serializer(seq_ser)
seq_ser(ser, [[[0]]])
ser.output()

b'\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'

In [572]:
rest_client = RestClient(NODE_URL)
faucet_client = FaucetClient(FAUCET_URL, rest_client)

In [573]:
def deep_vec_serializer(n, ser):
    for _ in range(n):
        ser = Serializer.sequence_serializer(ser)
    return ser

In [574]:
def fp_to_fixed64(f: np.float128) -> int:
    return round(np.float128(f) * np.float128(2**64)) & (2**128 - 1)
def fixed64_to_fp(f: int) -> np.float128:
    return np.float128(f) / np.float128(2**64)

In [575]:
import math
def fp_as_u128_serializer_abs(self, f):
    self.u128(fp_to_fixed64(abs(f)))
def fp_as_u128_serializer_sign(self, f):
    self.bool(f < 0)

In [576]:
TransactionArgument([[[1.5]]], deep_vec_serializer(3, fp_as_u128)).encode()

b'\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x80\x01\x00\x00\x00\x00\x00\x00\x00'

In [577]:
alice = Account.generate()

In [578]:
move_ai_addr = '5534a248787f181af2098e1b55326f59a7a098bb2292b55ba6ac9c4381535e59'

In [579]:
move_ai_address = AccountAddress.from_hex(move_ai_addr)
move_ai_private_key = ed25519.PrivateKey.from_hex('0x68db9d6724b56436517301058477345b5fdd67e6fea6358f564ec6b7068b9e63')

In [580]:
move_ai = Account(move_ai_address, move_ai_private_key)

In [581]:
entry_function = EntryFunction.natural(
    module=f"0x{move_ai_addr}::nn",
    function="update_nn",
    ty_args=[],
    args=[
        TransactionArgument(all_w, deep_vec_serializer(3, fp_as_u128_serializer_abs)),
        TransactionArgument(all_w, deep_vec_serializer(3, fp_as_u128_serializer_sign)),
        TransactionArgument(activations, deep_vec_serializer(1, Serializer.u8)),
    ],
)
tx_hash = rest_client.submit_bcs_transaction(rest_client.create_bcs_signed_transaction(move_ai, TransactionPayload(entry_function)))

In [602]:
print(f'Update Neural Network TX: {tx_hash}')

Update Neural Network TX: 0xeabc338cf55ce2fc67580a2243504c7cc7d6e8c15c770c973277fa11bba65d9c


In [583]:
rest_client.wait_for_transaction(tx_hash)

In [603]:
print(f'input: {list(X_test[0])}')

INPUT: [44.0, 80000.0, 2.0, 14.0, 2.0, 0.0, 10000.0, 6.03, 0.13, 1.0, 15.0]


In [593]:
entry_function = EntryFunction.natural(
    module=f"0x{move_ai_addr}::nn",
    function="run",
    ty_args=[],
    args=[
        TransactionArgument(list(X_test[0]), deep_vec_serializer(1, fp_as_u128_serializer_abs)),
        TransactionArgument(list(X_test[0]), deep_vec_serializer(1, fp_as_u128_serializer_sign)),
    ],
)
tx_hash = rest_client.submit_bcs_transaction(rest_client.create_bcs_signed_transaction(move_ai, TransactionPayload(entry_function)))

In [604]:
print(f'Run TX: {tx_hash}')

Run TX: 0xeabc338cf55ce2fc67580a2243504c7cc7d6e8c15c770c973277fa11bba65d9c


In [605]:
rest_client.wait_for_transaction(tx_hash)

In [608]:
print(f'on-chain calculated result: {fixed64_to_fp(32928804031077540392892)}')

on-chain calculated result: 1785.0740433921853


In [607]:
print(f'python model result: {model.predict(X_test[0:1])}')

python model result: [[1785.0743]]
