In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

In [None]:
data = pd.read_csv("../input/pima-indians-diabetes-database/diabetes.csv")

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.isnull().values.any()

In [None]:
data.describe()

In [None]:
data.columns = map(str.lower, data.columns)
data.columns

In [None]:
fig, ax = plt.subplots(4,2, figsize=(16,16))
sns.distplot(data.age, bins = 20, ax=ax[0,0]) 
sns.distplot(data.pregnancies, bins = 20, ax=ax[0,1]) 
sns.distplot(data.glucose, bins = 20, ax=ax[1,0]) 
sns.distplot(data.bloodpressure, bins = 20, ax=ax[1,1]) 
sns.distplot(data.skinthickness, bins = 20, ax=ax[2,0])
sns.distplot(data.insulin, bins = 20, ax=ax[2,1])
sns.distplot(data.diabetespedigreefunction, bins = 20, ax=ax[3,0]) 
sns.distplot(data.bmi, bins = 20, ax=ax[3,1]) 

In [None]:
sns.regplot(x = data.pregnancies, y = data.glucose)

In [None]:
sns.set(font_scale = 1.15)
plt.figure(figsize = (14, 10))

sns.heatmap(data.corr(), vmax = 1, linewidths = 0.5, fmt= '.1f',
            square = True, annot = True, cmap = 'YlGnBu', linecolor = "white")
plt.title('Correlation of Features');

In [None]:
# Normalization
# Normalization Formula; (x - min(x))/max(x)-min(x)
y = data.outcome.values
x = data.drop(["outcome"], axis = 1)

x = (x - np.min(x))/(np.max(x)-np.min(x)).values

In [None]:
# Train & Test Split
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

features = x_train.T
labels = y_train.T
test_features = x_test.T
test_labels = y_test.T

print("features: ", features.shape)
print("labels: ", labels.shape)
print("test_features: ", test_features.shape)
print("test_labels: ", test_labels.shape)

In [None]:
#Parameter Initialize 
def initialize_weights_and_bias(dimension):
    w = np.full((dimension, 1),0.01)
    b= 0.0
    return w,b

In [None]:
# Sigmoid Function**
# Sigmoid Function Formula; 1/(1+e^-x)
def sigmoid(z):
    y_head = 1/(1+np.exp(-z))
    return y_head

In [None]:
# Forward & Backward Propagation
def foward_and_backward_propagation(w, b, x_train, y_train):
    #Forward Propagation
    z = np.dot(w.T, x_train) + b
    y_head = sigmoid(z)
    loss = -y_train*np.log(y_head)-(1-y_train)*np.log(1-y_head)
    cost = (np.sum(loss))/x_train.shape[1]          #x_train.shape[1] is for scaling
    
    # Backward Propagation
    derivative_weight = (np.dot(x_train, ((y_head-y_train).T)))/x_train.shape[1]
    derivative_bias = np.sum(y_head-y_train)/x_train.shape[1]
    gradients = {"derivative_weight": derivative_weight, "derivative_bias": derivative_bias}
    
    return cost, gradients

In [None]:
#Updating Parameters
def update(w, b, x_train, y_train, learning_rate, number_of_iterations):
    cost_list = []
    cost_list2 = []
    index = []
    
    # Updating (learning) parameters is number_of_iterations times
    for i in range(number_of_iterations):
        
        cost, gradients = foward_and_backward_propagation(w, b, x_train, y_train)
        cost_list.append(cost)
        #Let's update
        w = w - learning_rate * gradients["derivative_weight"]
        b = b - learning_rate * gradients["derivative_bias"]
        if i % 10 == 0:
            cost_list2.append(cost)
            index.append(i)
            print("Cost after iterations %i: %f" %(i, cost))
            
    # We update (learn) parameters weights and bias
    parameters = {"weight": w, "bias": b}
    plt.plot(index, cost_list2)
    plt.title("Cost-Iteration Relation")
    plt.xticks(index, rotation = "vertical")
    plt.xlabel("Number of iterations")
    plt.ylabel("Cost")
    plt.show()
    return parameters, gradients, cost_list

In [None]:
#Prediction
def predict(w, b, x_test):
    # x_test is a input for forward propagation
    z = sigmoid(np.dot(w.T, x_test)+b)
    y_prediction = np.zeros((1, x_test.shape[1]))
    #
    #
    for i in range(z.shape[1]):
        if z[0, i] <= 0.5:
            y_prediction[0, i] = 0
        else:
            y_prediction[0, i] = 1
            
    return y_prediction

In [None]:
# Logistic Regression
def logistic_regression(features, labels, test_features, test_labels, learning_rate ,  num_iterations):
    # Initialize
    dimension =  features.shape[0]  # It is 8
    w,b = initialize_weights_and_bias(dimension)
    parameters, gradients, cost_list = update(w, b, features, labels, learning_rate,num_iterations)
    y_prediction_test = predict(parameters["weight"],parameters["bias"],test_features)
    # Print test errors
    print("test accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_test - test_labels)) * 100))
    
logistic_regression(features, labels, test_features, test_labels,learning_rate = 1.5, num_iterations = 300)   