# Neural Network to predict loan risk

dataset used for training: https://www.kaggle.com/datasets/laotse/credit-risk-dataset

In [16]:
# Tensorflow / Keras
from tensorflow import keras # for building Neural Networks
print('Tensorflow/Keras: %s' % keras.__version__) # print version
from keras.models import Sequential # for creating a linear stack of layers for our Neural Network
from keras import Input # for instantiating a keras tensor
from keras.layers import Dense # for creating regular densely-connected NN layers.

# Data manipulation
import pandas as pd # for data manipulation
print('pandas: %s' % pd.__version__) # print version
import numpy as np # for data manipulation
print('numpy: %s' % np.__version__) # print version

# Sklearn
import sklearn # for model evaluation
print('sklearn: %s' % sklearn.__version__) # print version
from sklearn.model_selection import train_test_split # for splitting data into train and test samples
from sklearn.metrics import classification_report # for model evaluation metrics

Tensorflow/Keras: 2.12.0
pandas: 2.0.2
numpy: 1.23.5
sklearn: 1.2.2


In [17]:
df = pd.read_csv("data/credit_risk_dataset.csv")
df.head()

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_status,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
0,22,59000,RENT,123.0,PERSONAL,D,35000,16.02,1,0.59,Y,3
1,21,9600,OWN,5.0,EDUCATION,B,1000,11.14,0,0.1,N,2
2,25,9600,MORTGAGE,1.0,MEDICAL,C,5500,12.87,1,0.57,N,3
3,23,65500,RENT,4.0,MEDICAL,C,35000,15.23,1,0.53,N,2
4,24,54400,RENT,8.0,MEDICAL,C,35000,14.27,1,0.55,Y,4


In [18]:
## Checking for Duplicates
dups = df.duplicated()
dups.value_counts() #There are 165 Duplicated rows

False    32416
True       165
Name: count, dtype: int64

In [19]:
## Removing the Duplicates
print(f"Shape of Data before removing duplicates -----> ({df.shape[0]},{df.shape[1]}) \n")
df.drop_duplicates(inplace=True)
print(f"Shape of Data after removing duplicates -----> ({df.shape[0]},{df.shape[1]})")

Shape of Data before removing duplicates -----> (32581,12) 

Shape of Data after removing duplicates -----> (32416,12)


In [23]:
# Set Pandas options to display more columns
pd.options.display.max_columns=50

# For columns with missing values, fill them in with column mean
# df=df.fillna(df.mean())

df.isnull().sum()

person_age                       0
person_income                    0
person_home_ownership            0
person_emp_length              887
loan_intent                      0
loan_grade                       0
loan_amnt                        0
loan_int_rate                 3095
loan_status                      0
loan_percent_income              0
cb_person_default_on_file        0
cb_person_cred_hist_length       0
dtype: int64

In [24]:
df['person_emp_length']=df['person_emp_length'].fillna(df['person_emp_length'].mean())
df['loan_int_rate']=df['loan_int_rate'].fillna(df['loan_int_rate'].mean())

#### categorical data to numeric

In [25]:
print(df['person_home_ownership'].unique())
print(df['loan_intent'].unique())
print(df['loan_grade'].unique())
print(df['cb_person_default_on_file'].unique())

['RENT' 'OWN' 'MORTGAGE' 'OTHER']
['PERSONAL' 'EDUCATION' 'MEDICAL' 'VENTURE' 'HOMEIMPROVEMENT'
 'DEBTCONSOLIDATION']
['D' 'B' 'C' 'A' 'E' 'F' 'G']
['Y' 'N']


In [27]:
df['person_home_ownership'].replace(['RENT', 'OWN', 'MORTGAGE', 'OTHER'],
                        [0, 1, 2, 3], inplace=True)
df['loan_intent'].replace(['PERSONAL', 'EDUCATION', 'MEDICAL', 'VENTURE', 'HOMEIMPROVEMENT', 'DEBTCONSOLIDATION'],
                        [0, 1, 2, 3, 4, 5], inplace=True)
df['loan_grade'].replace(['A', 'B', 'C', 'D', 'E', 'F', 'G'],
                        [0, 1, 2, 3, 4, 5, 6], inplace=True)
df['cb_person_default_on_file'].replace(['Y', 'N'],
                        [0, 1], inplace=True)

In [32]:
df.describe()

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_status,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
count,32416.0,32416.0,32416.0,32416.0,32416.0,32416.0,32416.0,32416.0,32416.0,32416.0,32416.0,32416.0
mean,27.747008,66091.64,0.913715,4.79051,2.340264,1.220508,9593.845632,11.017265,0.218688,0.17025,0.823235,5.811297
std,6.3541,62015.58,0.960899,4.088378,1.679273,1.167293,6322.730241,3.08304,0.413363,0.106812,0.381475,4.05903
min,20.0,4000.0,0.0,0.0,0.0,0.0,500.0,5.42,0.0,0.0,0.0,2.0
25%,23.0,38542.0,0.0,2.0,1.0,0.0,5000.0,8.49,0.0,0.09,1.0,3.0
50%,26.0,55000.0,0.0,4.0,2.0,1.0,8000.0,11.017265,0.0,0.15,1.0,4.0
75%,30.0,79218.0,2.0,7.0,4.0,2.0,12250.0,13.11,0.0,0.23,1.0,8.0
max,144.0,6000000.0,3.0,123.0,5.0,6.0,35000.0,23.22,1.0,0.83,1.0,30.0


#### Target variable is loan_status

In [33]:
df["loan_status"].value_counts(normalize=True)

loan_status
0    0.781312
1    0.218688
Name: proportion, dtype: float64

#### Split the data into train and test sets

In [40]:
X_df, X_test_df, y_df, y_test_df = train_test_split(df.drop('loan_status', axis=1), df['loan_status'],
                                        random_state=0,  test_size=0.2, stratify=df['loan_status'],
                                        shuffle=True)

In [76]:
y_df.describe()

count    25932.000000
mean         0.218687
std          0.413364
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000000
max          1.000000
Name: loan_status, dtype: float64

In [46]:
X = X_df.to_numpy()
y = y_df.to_numpy()
X_test = X_test_df.to_numpy()
y_test = y_test_df.to_numpy()

In [48]:
X[0].shape

(11,)

In [102]:
model = Sequential(name="Model-with-One-Input") # Model
model.add(Input(shape=X[0].shape, name='Input-Layer')) # Input Layer - need to speicfy the shape of inputs
model.add(Dense(12, activation='relu', name='h_l1')) # Hidden Layer, softplus(x) = log(exp(x) + 1)
model.add(Dense(8, activation='relu', name='h_l2')) # Hidden Layer, softplus(x) = log(exp(x) + 1)
model.add(Dense(1, activation='sigmoid', name='Output-Layer')) # Output Layer, sigmoid(x) = 1 / (1 + exp(-x))

In [103]:
model.compile(optimizer='Adam', loss='binary_crossentropy')

In [114]:
model.fit(X, y)



<keras.callbacks.History at 0x7fd305258bd0>

In [115]:
model.evaluate(X_test, y_test)



0.5254142880439758

In [116]:
sum(y_test)

1418

In [117]:
model.predict(X_test)



array([[0.22630154],
       [0.22630154],
       [0.22630154],
       ...,
       [0.22630154],
       [0.22630154],
       [0.22630154]], dtype=float32)

In [118]:
y_test

array([0, 1, 0, ..., 0, 0, 1])

In [119]:
model.predict(X_test)



array([[0.22630154],
       [0.22630154],
       [0.22630154],
       ...,
       [0.22630154],
       [0.22630154],
       [0.22630154]], dtype=float32)

In [120]:
sum(list(map(lambda x: 1 if (x>0.5)[0] else 0 , model.predict(X_test))) == y_test) / len(y_test)



0.7813078346699568

In [121]:
y_test

array([0, 1, 0, ..., 0, 0, 1])

In [164]:
all_w = []
activations = []
import json
for layer in model.layers:
    all_w.append(layer.weights[0].numpy().tolist())
    activations.append(layer.activation.__name__)

In [168]:
model_json = {
    'weights': all_w,
    'activation_functions': activations,
}
model_file = open('model.json','w')
json.dump(model_json, model_file)
model_file.close()

In [170]:

import os

# :!:>section_1
NODE_URL = os.getenv("APTOS_NODE_URL", "https://fullnode.devnet.aptoslabs.com/v1")
FAUCET_URL = os.getenv(
    "APTOS_FAUCET_URL",
    "https://faucet.devnet.aptoslabs.com",
)  # <:!:section_1

In [172]:
# Copyright © Aptos Foundation
# SPDX-License-Identifier: Apache-2.0

from aptos_sdk.account import Account
from aptos_sdk.client import FaucetClient, RestClient

if __name__ == "__main__":
    # :!:>section_1
    rest_client = RestClient(NODE_URL)
    faucet_client = FaucetClient(FAUCET_URL, rest_client)  # <:!:section_1

    # :!:>section_2
    alice = Account.generate()
    bob = Account.generate()  # <:!:section_2

    print("\n=== Addresses ===")
    print(f"Alice: {alice.address()}")
    print(f"Bob: {bob.address()}")

    # :!:>section_3
    faucet_client.fund_account(alice.address(), 100_000_000)
    faucet_client.fund_account(bob.address(), 0)  # <:!:section_3

    print("\n=== Initial Balances ===")
    # :!:>section_4
    print(f"Alice: {rest_client.account_balance(alice.address())}")
    print(f"Bob: {rest_client.account_balance(bob.address())}")  # <:!:section_4

    # Have Alice give Bob 1_000 coins
    # :!:>section_5
    txn_hash = rest_client.transfer(alice, bob.address(), 1_000)  # <:!:section_5
    # :!:>section_6
    rest_client.wait_for_transaction(txn_hash)  # <:!:section_6

    print("\n=== Intermediate Balances ===")
    print(f"Alice: {rest_client.account_balance(alice.address())}")
    print(f"Bob: {rest_client.account_balance(bob.address())}")

    # Have Alice give Bob another 1_000 coins using BCS
    txn_hash = rest_client.bcs_transfer(alice, bob.address(), 1_000)
    rest_client.wait_for_transaction(txn_hash)

    print("\n=== Final Balances ===")
    print(f"Alice: {rest_client.account_balance(alice.address())}")
    print(f"Bob: {rest_client.account_balance(bob.address())}")

    rest_client.close()


=== Addresses ===
Alice: 0x45b4bdd6f2f4065dfe0e5ba4ce69ce9b14ced769f7ac6635ceba74eb86ef58a2
Bob: 0x41db77bc17d72e0f63bc888bb36d9431f4f681911b1aca8087d3dc9b77bf71eb

=== Initial Balances ===
Alice: 100000000
Bob: 0

=== Intermediate Balances ===
Alice: 99998400
Bob: 1000

=== Final Balances ===
Alice: 99996800
Bob: 2000
