In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Importing Required Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.model_selection import train_test_split

# Data Collection

In [None]:
data = pd.read_csv('/kaggle/input/pima-indians-diabetes-database/diabetes.csv')
data.head()

# Data Analysis

In [None]:
# analysis of the raw data

sns.pairplot(data, hue = "Outcome")

In [None]:
dataset = data.copy()
for col in dataset.columns[0:8]:
    dataset[col] = dataset[col]/abs(dataset[col].max())
dataset.head()

In [None]:
# analysis of the normalized data

sns.pairplot(dataset, hue="Outcome")

In [None]:
train, test = train_test_split(dataset, test_size=0.3, random_state=42, shuffle=True)

# Defining Important Functions

In [None]:
def makeinput(df):
    x = df.iloc[:, :-1].values
    y = df.iloc[:, -1]. values
    y = y.reshape(y.shape[0], 1)
    x0 = np.ones(x.shape[0]).reshape(x.shape[0], 1)
    x = np.append(x0, x, axis=1)
    return x, y

In [None]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

a = np.linspace(-5, 5, 200)
b = sigmoid(a)
plt.plot(a, b)
plt.xlabel("x")
plt.ylabel("Sigmoid Function")
plt.show()    

In [None]:
def hyp(theta, x):
    return np.matmul(x, theta)

In [None]:
def cost(theta, x, y):
    m = x.shape[0]
    h = hyp(theta, x)
    J = -1/(m) * np.sum(y*np.log(sigmoid(h)) + (1-y)*np.log(1-sigmoid(h)))
    return J

In [None]:
def grad(theta, x, y, alpha, epochs):
    m = x.shape[0]
    j = np.zeros(epochs)
    for i in range(epochs):
        h = hyp(theta,x)
        theta = theta - (alpha/m)*np.matmul(np.transpose(x), (sigmoid(h)-y))
        j[i] = cost(theta, x, y)
    return theta, j

In [None]:
def pred(theta, x):
    h = hyp(theta, x)
    return sigmoid(h)

# Training

In [None]:
# Initialization of coefficient

x_train, y_train = makeinput(train)
m, n = x_train.shape
theta = np.zeros((n, 1))

In [None]:
# Loss without optimization

loss = cost(theta, x_train, y_train)
print("Loss without optimization is " + str(loss))

In [None]:
# Optimization with GD

alpha = 0.01        # learning rate
epochs = 75000     # no of iterations to run the loop
theta, j = grad(theta, x_train, y_train, alpha, epochs)
loss_gd = cost(theta, x_train, y_train)

print("Loss after optimization is " + str(loss_gd))


In [None]:
iteration = np.arange(0, epochs, 1)  
plt.plot(iteration, j)
plt.show()
    

# Model Evaluation

In [None]:
x_test, y_test = makeinput(test)
y_test[80:90]

In [None]:

predictions = np.round(pred(theta, x_test))
predictions[80:90]

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
confusion_matrix(y_test, predictions)

In [None]:
print(classification_report(y_test, predictions))