In [482]:
pip install numpy

Note: you may need to restart the kernel to use updated packages.


In [483]:
# import required libraries
import numpy as np
import pandas as pd

In [484]:
# Reading data from loan approval csv into python lists
data=pd.read_csv("loan_approval_dataset.csv")

# check the columns in csv
data.columns = data.columns.str.strip()
# print(data.columns)

X_original = data[[
    'no_of_dependents',
    'education',
    'self_employed',
    'income_annum',
    'loan_amount',
    'loan_term',
    'cibil_score',
    'residential_assets_value',
    'commercial_assets_value',
    'luxury_assets_value',
    'bank_asset_value',
]].values.tolist()

X = X_original

# code below normalises data
X = []
for row in X_original:
    row[1] = 10 if row[1].strip().lower() == 'graduate' else 0
    row[2] = 10 if row[2].strip().lower() == 'yes' else 0
    row_new = []
    for i in range(0, len(row)):
        if (i < 1 or i > 2):
            row_new.append(row[i] / X_original[0][i] * 10)
        else:
            row_new.append(row[i])
    X.append(row_new)

# reading loan approval status into Y as 1 or 0
Y = data["loan_status"].values.tolist()
Y = list(map(lambda st: 1 if st.strip() == 'Approved' else 0, Y))

In [485]:
# defining sigmoid function

def sigmoid(Z):
    A = 1 / (1 + np.exp(-Z))
    return A

In [486]:
# initialise data

num_iterations=100
learning_rate=0.009

# make X, Y numpy arrays for vectorisation
X = np.array(X)
X = X.reshape(11, X.shape[0])

Y = np.array(Y)
Y = Y.reshape(1, Y.shape[0])

# initialise W with small random values
W = np.random.randn(X.shape[0], 1) * .001
W = W.reshape(W.shape[0], 1)

# b initialised with 0
b = 0.0

# m is size of experimental set
m = X.shape[1]

In [487]:
# iterate fine tuning W and b using derivative

for i in range(1, num_iterations):
    A = sigmoid(np.dot(W.T, X) + b)

    cost = (-1 / m) * np.sum((Y * np.log(A)) + (1 - Y) * np.log(1 - A))
    print(f'Cost after step {i} = {cost}')

    dW = (1 / m) * np.dot(X, (A - Y).T)
    db = (1 / m) * np.sum(A - Y).T

    W = W - learning_rate * dW
    b = b - learning_rate * db

Cost after step 1 = 0.6869070562022176
Cost after step 2 = 0.6862195637209966
Cost after step 3 = 0.6786304665011934
Cost after step 4 = 0.6781842126778586
Cost after step 5 = 0.6740951152794937
Cost after step 6 = 0.6737153667906202
Cost after step 7 = 0.671268998900563
Cost after step 8 = 0.6709389194017326
Cost after step 9 = 0.6693826193227992
Cost after step 10 = 0.6691009652612971
Cost after step 11 = 0.6680697842389959
Cost after step 12 = 0.6678338226881018
Cost after step 13 = 0.6671304668193796
Cost after step 14 = 0.6669355570393313
Cost after step 15 = 0.6664452818714511
Cost after step 16 = 0.6662859954913808
Cost after step 17 = 0.6659384592519225
Cost after step 18 = 0.665809362109396
Cost after step 19 = 0.6655596957745554
Cost after step 20 = 0.6654557557564502
Cost after step 21 = 0.6652744399336562
Cost after step 22 = 0.6651912017142164
Cost after step 23 = 0.6650583318590066
Cost after step 24 = 0.6649919625210854
Cost after step 25 = 0.6648938474463562
Cost after 

In [488]:
# predict values using W and b

def predict(values):
    
    v_mapped = []
    for i in range(0, len(values)):
        if (i < 1 or i > 2):
            v_mapped.append(values[i] / X_original[0][i] * 10)
        if (i == 1):
            v_mapped.append(10 if values[1].strip().lower() == 'graduate' else 0)
        if (i == 2):
            v_mapped.append(10 if values[2].strip().lower() == 'yes' else 0)

    print(v_mapped)
    return sigmoid(np.dot(W.T, v_mapped) + b)

In [489]:
print(predict([0, 'Not Gradulate', 'Yes', 4100000, 12200000, 8, 417, 2700000, 2200000, 8800000, 3300000]))
print(predict([8, 'Gradulate', 'No', 8700000, 33000000, 4, 678, 22500000, 14800000, 29200000, 4300000]))

[5.0, 0, 0, 1.0416666666666667e-06, 3.34448160535117e-07, 0.8333333333333333, 0.012853470437017993, 4.166666666666667e-06, 5.681818181818182e-07, 4.4052863436123346e-07, 1.2499999999999999e-06]
[0.50671498]
[0.0, 0, 10, 4.270833333333333, 4.080267558528428, 6.666666666666666, 5.359897172236504, 11.25, 1.25, 3.8766519823788546, 4.125]
[0.56716758]
[40.0, 0, 0, 9.0625, 11.036789297658862, 3.333333333333333, 8.7146529562982, 93.75, 8.40909090909091, 12.863436123348018, 5.375]
[0.72064649]
