In [280]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import sklearn.model_selection as sk

In [281]:
df = pd.read_csv('diabetes2.csv')

In [282]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [283]:
X = df.drop(columns='Outcome')
Y = df['Outcome']

In [284]:
X.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


In [285]:
Y.head()

0    1
1    0
2    1
3    0
4    1
Name: Outcome, dtype: int64

In [297]:
x_train, x_test, y_train, y_test = sk.train_test_split(X, Y, test_size= 0.3, random_state = 78)

In [298]:
mean = x_train.sum()/x_train.count()

In [299]:
variance = (((x_train - mean)**2).sum())/x_train.count()
sd = np.sqrt(variance)

In [300]:
mean

Pregnancies                   3.869646
Glucose                     120.774674
BloodPressure                70.068901
SkinThickness                20.774674
Insulin                      79.638734
BMI                          31.977467
DiabetesPedigreeFunction      0.452737
Age                          33.139665
dtype: float64

In [301]:
sd

Pregnancies                   3.395932
Glucose                      31.078136
BloodPressure                18.612207
SkinThickness                15.723622
Insulin                     118.584104
BMI                           7.900489
DiabetesPedigreeFunction      0.314627
Age                          11.559942
dtype: float64

In [302]:
x_train = (x_train - mean)/sd

In [303]:
x_train.head(10)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
718,-0.845025,-0.41105,-0.540984,1.604295,0.829464,0.445863,-0.119944,-0.790632
159,3.866495,1.358683,0.103754,1.286302,0.289763,1.129365,1.157761,1.198997
28,2.688615,0.779497,0.641036,-0.112867,0.256032,-1.237578,-0.660266,2.064053
264,0.038385,0.071604,-0.433527,-1.32124,-0.67158,0.002852,-0.720655,0.160929
325,-0.845025,1.165621,0.103754,0.01433,0.745136,-0.807224,-1.048028,-0.790632
96,-0.550555,-0.925882,-0.433527,0.45952,-0.67158,-0.047778,-1.025779,-0.790632
69,0.038385,0.811674,0.802221,0.395922,0.171703,-0.389529,-0.838255,-0.531116
659,-0.256085,-1.312005,0.641036,0.650316,-0.081282,0.281316,2.667487,-0.531116
603,0.921795,0.940382,0.426123,0.523119,0.390957,0.40789,0.760465,1.804536
212,0.921795,1.873514,1.339503,0.650316,-0.67158,0.281316,-0.917714,2.32357


In [304]:
x_test = (x_test - mean)/sd

In [305]:
def sigmoid(z):
    return 1/(1 + math.exp(-z))

In [306]:
def evaluate(TP, FP, FN, TN):
    accuracy = (TN + TP)/(TN + TP + FN + FP)
    precision = TP/(FP + TP)
    recall = TP/(TP + FN)
    return accuracy

In [307]:
iteration = 400
w = np.random.randn(len(x_train.columns) + 1)
m = x_train.count()[0]
m_test = x_test.count()[0]
for itr in range(iteration + 1):
    
    if itr < 300:
        lr = 0.2
    elif 300 <= itr < 400:
        lr = 0.15

    J_train, J_test, dw = 0, 0, 0
    TP_tr, FP_tr, FN_tr, TN_tr, TP_test, FP_test, FN_test, TN_test = 0, 0, 0, 0, 0, 0, 0, 0

    for i in range(m):
    
        x = np.array(x_train.iloc[i])
        x = np.append(x, 1)
        h = sigmoid(np.dot(x, w))
        y = y_train.iloc[i]
        J_train += (- y * math.log(h)) - ((1-y)*math.log(1-h))
        
        dw += np.dot(x, (h - y))
        
        if h >= 0.5:
            if y == 1:
                TP_tr += 1
            else:
                FP_tr += 1
        else:
            if y == 1:
                FN_tr += 1
            else:
                TN_tr += 1
        
    J_train = J_train/m
    Acc_train = evaluate(TP_tr, FP_tr, FN_tr, TN_tr) 
    
    w = w - (lr/m)*dw
    
    for i in range(m_test):
        
        x = np.array(x_test.iloc[i])
        x = np.append(x, 1)
        h = sigmoid(np.dot(x, w))
        y = y_test.iloc[i]
        J_test += (- y * math.log(h)) - ((1-y)*math.log(1-h))
        
        if h >= 0.5:
            if y == 1:
                TP_test += 1
            else:
                FP_test += 1
        else:
            if y == 1:
                FN_test += 1
            else:
                TN_test += 1
    
    J_test = J_test/m_test
    Acc_test = evaluate(TP_test, FP_test, FN_test, TN_test)
    
    if itr%20 == 0:
    
        J_train = round(J_train, 3)
        J_test = round(J_test, 3)
        Acc_train = round(Acc_train, 3)
        Acc_test = round(Acc_test, 3)
        
        p = str(itr) + ' - Tr loss : ' + str(J_train) + ' Tr Acc : ' + str(Acc_train)
        p += ' Test loss : ' + str(J_test) + ' Test Acc : ' + str(Acc_test)
        
        print(p)

0 - Tr loss : 1.757 Tr Acc : 0.469 Test loss : 1.778 Test Acc : 0.446
20 - Tr loss : 0.78 Tr Acc : 0.654 Test loss : 0.761 Test Acc : 0.641
40 - Tr loss : 0.54 Tr Acc : 0.739 Test loss : 0.553 Test Acc : 0.727
60 - Tr loss : 0.496 Tr Acc : 0.769 Test loss : 0.519 Test Acc : 0.753
80 - Tr loss : 0.482 Tr Acc : 0.773 Test loss : 0.507 Test Acc : 0.745
100 - Tr loss : 0.474 Tr Acc : 0.764 Test loss : 0.501 Test Acc : 0.753
120 - Tr loss : 0.47 Tr Acc : 0.775 Test loss : 0.497 Test Acc : 0.753
140 - Tr loss : 0.468 Tr Acc : 0.767 Test loss : 0.496 Test Acc : 0.758
160 - Tr loss : 0.466 Tr Acc : 0.765 Test loss : 0.495 Test Acc : 0.775
180 - Tr loss : 0.465 Tr Acc : 0.764 Test loss : 0.495 Test Acc : 0.775
200 - Tr loss : 0.465 Tr Acc : 0.764 Test loss : 0.495 Test Acc : 0.775
220 - Tr loss : 0.465 Tr Acc : 0.765 Test loss : 0.495 Test Acc : 0.771
240 - Tr loss : 0.465 Tr Acc : 0.765 Test loss : 0.496 Test Acc : 0.771
260 - Tr loss : 0.465 Tr Acc : 0.767 Test loss : 0.496 Test Acc : 0.771
2