Implementation of Batch Gradient Descent.

In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('diabetes.csv')

In [3]:
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
array = data.to_numpy()
array

array([[  6.   , 148.   ,  72.   , ...,   0.627,  50.   ,   1.   ],
       [  1.   ,  85.   ,  66.   , ...,   0.351,  31.   ,   0.   ],
       [  8.   , 183.   ,  64.   , ...,   0.672,  32.   ,   1.   ],
       ...,
       [  5.   , 121.   ,  72.   , ...,   0.245,  30.   ,   0.   ],
       [  1.   , 126.   ,  60.   , ...,   0.349,  47.   ,   1.   ],
       [  1.   ,  93.   ,  70.   , ...,   0.315,  23.   ,   0.   ]])

In [5]:
target = array[:, -1].T
train_target = target[0:614]
test_target = target[614:]
regressors = array[:, 0:8]
weights = np.zeros(8)
bias = 0.

In [6]:
#split to train and test set
train = regressors[0:614, :].T
test = regressors[614:, :]

In [7]:
def gradient(train, target, weights, bias):
    m = 614 # number of training examples
    value = np.dot(weights.T, train) + bias
    error_weight = (value - target)
    error_bias = np.sum(error_weight)
    real_error_weight = np.dot(error_weight.T, train.T)

    error_bias /= m
    real_error_weight /= m
    return real_error_weight, error_bias

In [8]:
def batch_gradient_descent(train, target, weights, bias, iterations, alpha):
    for i in range(iterations):
        error_weight, error_bias = gradient(train, target, weights, bias)
        weights = weights - error_weight*alpha
        bias = bias - error_bias*alpha
    return weights, bias

In [9]:
w, b = batch_gradient_descent(train, train_target, weights, bias, 1000, 1.0e-7)

In [10]:
#testing set values
x = test[: , :]
outputs = np.dot(w.T, x.T) + b

In [11]:
counts = 0
for i in range(0, 154):
    if(int(np.round(outputs[i])) == int(test_target[i])):
        counts = counts + 1
print("Total accurate predictions = ", counts)
print("Accuracy = ", (counts/154)*100)

Total accurate predictions =  104
Accuracy =  67.53246753246754


In [12]:
finals = np.dot(w.T, train) + b
counts = 0
for i in range(0, 614):
    if(int(finals[i]) == int(train_target[i])):
        counts = counts + 1
print("Total accurate predictions = ", counts)
print("Accuracy = ", (counts/614)*100)

Total accurate predictions =  401
Accuracy =  65.30944625407166
