*In this notebook I worked on logistic regression.*

# **Introduction**
* [Loading and exploring data](#1)
* [Logistic regression analysis](#2) 

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

<a id = 1></a>
## **Loading and Exploring Data**

In [None]:
data = pd.read_csv("../input/heart-disease-prediction-using-logistic-regression/framingham.csv")

In [None]:
data.info()

In [None]:
data.head(10)

In [None]:
#checking null values
data.isna().sum()

In [None]:
#drop unused columns
data.drop(["currentSmoker", "prevalentStroke", "prevalentHyp", "diabetes", "TenYearCHD", "BPMeds", "education"],axis=1,inplace=True)

In [None]:
#filling missing values
data.cigsPerDay = data.cigsPerDay.fillna(np.mean(data.cigsPerDay))
data.totChol = data.totChol.fillna(np.mean(data.totChol))
data.BMI = data.BMI.fillna(np.mean(data.BMI))
data.heartRate = data.heartRate.fillna(np.mean(data.heartRate))
data.glucose = data.glucose.fillna(np.mean(data.glucose))

In [None]:
data.isna().sum()

In [None]:
data.describe()

<a id=2></a>
## Logistic Regression Analysis


In [None]:
y = data.male.values #to turn into np array
x_data = data.drop(["male"], axis=1)

In [None]:
x = (x_data - np.min(x_data))/(np.max(x_data) - np.min(x_data)).values

In [None]:
#test train split
from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=42)

x_train = x_train.T
x_test = x_test.T
y_train = y_train.T
y_test = y_test.T

In [None]:
#parameter initialize and sigmoid function
def init_weightsandbias(dimension): 
    w = np.full((dimension,1),0.01) 
    b = 0.0 
    return w,b
    
def sigmoid_func(z):
    y_head = 1/(1+np.exp(-z))
    return y_head 

In [None]:
#Forward - Backward Propagation    
def forward_backward_propagation(w,b,x_train,y_train):
    #forward propagation
    z = np.dot(w.T,x_train) + b 
    y_head = sigmoid_func(z)
    loss = -y_train*np.log(y_head)-(1-y_train)*np.log(1-y_head)
    cost = (np.sum(loss))/x_train.shape[1] #x_train.shape[1] for scaling

    #backward propagation
    derivative_weight = (np.dot(x_train,((y_head-y_train).T)))/x_train.shape[1]
    derivative_bias = np.sum(y_head-y_train)/x_train.shape[1]

    gradients = {"derivative_weight": derivative_weight, "derivative_bias": derivative_bias}
    return cost,gradients 

In [None]:
#updating parameters
def update(w, b, x_train, y_train, learning_rate, number_of_iteration):
    cost_list = [] #to store all costs
    cost_list2 = [] #store costs in every 20 steps
    index = []
    #updating(learning) parameters 
    for i in range(number_of_iteration):
        cost,gradients = forward_backward_propagation(w,b,x_train,y_train)
        cost_list.append(cost)
        w = w - learning_rate * gradients["derivative_weight"]
        b = b - learning_rate * gradients["derivative_bias"]
        if i % 20 == 0:
            cost_list2.append(cost)
            index.append(i)
            print ("Cost after iteration %i: %f" %(i, cost))

    #update(learn) parameters weights and bias
    parameters = {"weight": w,"bias": b}
    plt.plot(index,cost_list2)
    plt.xticks(index,rotation='vertical')
    plt.xlabel("Number of Iterarion")
    plt.ylabel("Cost")
    plt.show()
    return parameters, gradients, cost_list

In [None]:
#implementing prediction
def predict(w,b,x_test):
    z = sigmoid_func(np.dot(w.T,x_test) + b)
    Y_prediction = np.zeros((1,x_test.shape[1]))
    # if z is bigger than 0.5, our prediction is sign one (y_head=1),
    # if z is smaller than 0.5, our prediction is sign zero (y_head=0),
    for i in range(z.shape[1]):
        if z[0,i]<= 0.5:
            Y_prediction[0,i] = 0
        else:
            Y_prediction[0,i] = 1

    return Y_prediction

In [None]:
#Logistic regression implementation
def logistic_regression(x_train, y_train, x_test, y_test, learning_rate , num_iterations):
    # initialize
    dimension =  x_train.shape[0]  
    w,b = init_weightsandbias(dimension)
    parameters, gradients, cost_list = update(w, b, x_train, y_train, learning_rate, num_iterations)
    
    y_prediction_test = predict(parameters["weight"],parameters["bias"],x_test)
    y_prediction_train = predict(parameters["weight"],parameters["bias"],x_train)
    
    print("train accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_train - y_train)) * 100)) #overtrain değeri hesaplamak içinmiş
    print("test accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_test - y_test)) * 100))
    
logistic_regression(x_train, y_train, x_test, y_test,learning_rate = 0.2, num_iterations = 500)

Here is the result:
    * train accuracy is  66.9616%
    * test accuracy is 64.5047%
By changing learning_rate and num_iterations values, the result will change too. 

Thanks for checking my notebook. 