<a href="https://colab.research.google.com/github/sushruthsssss/codechef_srm/blob/master/Heart_disease_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# New Section

In [2]:
import numpy as np
import pandas as pd

import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')


In [3]:
input="/content/heart_failure_clinical_records_dataset.csv"
data=pd.read_csv(input)
df=data.copy()
data.head(10)

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1
5,90.0,1,47,0,40,1,204000.0,2.1,132,1,1,8,1
6,75.0,1,246,0,15,0,127000.0,1.2,137,1,0,10,1
7,60.0,1,315,1,60,0,454000.0,1.1,131,1,1,10,1
8,65.0,0,157,0,65,0,263358.03,1.5,138,0,0,10,1
9,80.0,1,123,0,35,1,388000.0,9.4,133,1,1,10,1


In [11]:
df = df[df['ejection_fraction']<70]

inp_data = df.drop(data[['DEATH_EVENT']], axis=1)
out_data = df[['DEATH_EVENT']]

scaler = StandardScaler()
inp_data = scaler.fit_transform(inp_data)

X_train, X_test, y_train, y_test = train_test_split(inp_data, out_data, test_size=0.2, random_state=42)

In [12]:
y_train=y_train.to_numpy()
y_test=y_test.to_numpy()

In [15]:
print("X_shape:",X_train.shape)
print("X_shape:",X_test.shape)
print("X_shape:",y_train.shape)
print("X_shape:",y_test.shape)

X_shape: (237, 12)
X_shape: (60, 12)
X_shape: (237, 1)
X_shape: (60, 1)


In [17]:
def weigth(n_features):
  w=np.zeros((1,n_features))
  b=0
  return w,b

In [18]:
def sigmoid(result):
  final=1/(1+np.exp(-result))
  return final

In [40]:
def model_optimize(w,b,X,Y):
    m = X.shape[0]
    
    # prediction
    final_result = sigmoid(m)
    Y_T = Y.T
    cost = (-1/m)*(np.sum((Y_T*np.log(final_result)) + ((1-Y_T)*(np.log(1-final_result)))))
    
    # gradient calculation
    dw = (1/m)*(np.dot(X.T, (final_result-Y.T).T))
    db = (1/m)*(np.sum(final_result-Y.T))
    
    grads = {
        "dw": dw,
        "db": db
    }
    return grads, cost

In [48]:
def model_predict(w, b, X, Y, learning_rate, no_iterations):
    costs = []
    for i in range(no_iterations):
        grads, cost = model_optimize(w,b,X,Y)
        dw = grads["dw"]
        db = grads["db"]
        #weight update
        w = w - (learning_rate * (dw.T))
        b = b - (learning_rate * db)
        
        if (i % 100 == 0):
            costs.append(cost)
    
    #final parameters
    coeff = {"w": w, "b": b}
    gradient = {"dw": dw, "db": db}
    
    return coeff, gradient, cost

In [49]:
def predict(final_pred,m):
  y_pred=np.zeros((1,m))
  for i in range(final_pred.shape[1]):
    if final_pred[0][i]>0.5:
      y_pred[0][i]=1
  return y_pred

In [51]:
#Get number of features
n_features = X_train.shape[1]
print('Number of Features', n_features)
w, b = weigth(n_features)
#Gradient Descent
coeff, gradient, costs = model_predict(w, b, X_train, y_train, learning_rate=0.0001,no_iterations=4500)
#Final prediction
w = coeff["w"]
b = coeff["b"]
print('Optimized weights', w)
print('Optimized intercept',b)
#
final_train_pred = sigmoid(np.dot(w,X_train.T)+b)
final_test_pred = sigmoid(np.dot(w,X_test.T)+b)
#
m_tr =  X_train.shape[0]
m_ts =  X_test.shape[0]
#
y_tr_pred = predict(final_train_pred, m_tr)
print('Training Accuracy',accuracy_score(y_tr_pred.T, y_train))
#
y_ts_pred = predict(final_test_pred, m_ts)
print('Test Accuracy',accuracy_score(y_ts_pred.T, y_test))



Number of Features 12
Optimized weights [[ 0.06857494  0.01080541  0.00912437  0.01223705 -0.05384381  0.01483599
   0.01684062  0.06253752 -0.03054052 -0.00072533  0.00147623 -0.11612092]]
Optimized intercept -0.307594936708856
Training Accuracy 0.7341772151898734
Test Accuracy 0.7333333333333333


In [52]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression()

clf.fit(X_train, y_train)

print (clf.intercept_, clf.coef_)

pred = clf.predict(X_test)

print ('Accuracy from sk-learn: {0}'.format(clf.score(X_test, y_test)))

[-1.35974827] [[ 0.69029406  0.0761576   0.18316243  0.20164252 -0.96431745 -0.05342474
  -0.08326401  0.44276159 -0.23266348 -0.32211913 -0.06331043 -1.56330289]]
Accuracy from sk-learn: 0.7833333333333333
