In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
filepath="../input/heart-disease-prediction-using-logistic-regression/framingham.csv"
df_org=pd.read_csv(filepath)

In [None]:
#Data clean up
df=df_org.dropna()
new_df=df.drop(df[df['TenYearCHD']==0].sample(frac=0.8).index)

dataset=new_df.to_numpy()

np.random.shuffle(dataset)

datasetX=dataset[:,0:-1]
datasetY=dataset[:,-1]

#normalize
datasetX=(datasetX-np.min(datasetX,axis=0))/(np.max(datasetX,axis=0)-np.min(datasetX,axis=0))

In [None]:
print("Output ratio")
hist=plt.hist(datasetY)

In [None]:
#Flatten dataset and divide it into sets

flat_dataset_X=datasetX.reshape((datasetX.shape[0],-1)).T

trainX,trainY = flat_dataset_X[:,100:],datasetY[100:]
testX,testY=flat_dataset_X[:,:100],datasetY[:100]

trainY=trainY.reshape(trainY.shape[0],1).T
testY=testY.reshape(testY.shape[0],1).T

In [None]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [None]:
def calc_cost(A,Y,m):
    return -np.sum(Y*np.log(A)+(1-Y)*np.log(1-A))/m

In [None]:
def train(X,Y,alpha,itercount,print_cost,print_at=100,stored_cost_len=5000):
    assert(X.shape[1]==Y.shape[1])
    m=X.shape[1]
    w=np.zeros((X.shape[0],1))
    b=0.0
    count=0
    
    costs=[]
    
    print("training...")
    for _ in range(itercount):
        Z=np.dot(w.T,X)+b
        A=sigmoid(Z)
        
        dZ=(A-Y)/m
        dW=np.dot(X,dZ.T)
        db=np.sum(dZ)
        w=w-alpha*dW
        b=b-alpha*db
        
        cost=calc_cost(A,Y,m)
        if len(costs)==stored_cost_len:
            costs.pop(0)
        costs.append(cost)
        
        if print_cost:
            if count%print_at==0:        
                print(f"Cost at iteration {count} = {cost}")
            count+=1
        
    print("finished...")
    print("Cost=",cost)
    return w,b,costs

In [None]:
w,b,costs=train(trainX,trainY,0.1,50000,True,print_at=5000,stored_cost_len=10000)

In [None]:
plot=plt.plot(costs)
plt.title("last few costs")

In [None]:
#TEST
def test(X,Y,w,b):
    Z=np.dot(w.T,X)+b
    A=sigmoid(Z)
    A=np.rint(A)
    accuracy=np.sum(A==Y)/A.shape[1]
    return 100*accuracy

In [None]:
print("Accuracy on test set=",test(testX,testY,w,b),"%")
print("Accuracy on train set=",test(trainX,trainY,w,b),"%")