# heard disease prediction

In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn import metrics
import numpy as np
import seaborn as sns

In [2]:
heart_df = pd.read_csv("heart.csv")
heart_df.head()


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


drop the fbs for the really low absolute correlation with target

In [3]:
heart_Df = heart_df.drop(columns="fbs")
heart_Df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,106,0,1.9,1,3,2,0


split the data set into train one and test one

In [5]:
Y_df = heart_Df["target"].values.copy()
X_df = heart_Df.iloc[:, 0:12].values.copy()
print(X_df.shape)
print(Y_df.shape)

(1025, 12)
(1025,)


In [103]:
X_train, X_test, Y_train, Y_test = train_test_split(X_df, Y_df, test_size=0.20, random_state=1)
Y_train = Y_train.reshape(Y_train.shape[0], 1)
Y_test = Y_test.reshape(Y_test.shape[0], 1)
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(820, 12)
(205, 12)
(820, 1)
(205, 1)


### multiple linear regression

$$Y = w X + w_0$$
$$ Y =  \begin{bmatrix}
X & 1 
\end{bmatrix} \begin{bmatrix}
w \\
w_0 
\end{bmatrix} = \hat{X}W$$

our purpose is to calculate
$$W = argmin ||Y - \hat{X}W||_2 $$

$$l = ||Y - \hat{X}W||^2_2$$
$$l = \sum_i (Y_i - (\hat{X}W)_i)^2$$
$$l = \sum_i (Y_i - \sum_k \hat{X}_{ik} W_k)^2$$
$$  \frac{\partial l}{\partial W_u} = 2 \sum_i (Y_i - \sum_k \hat{X}_{ik}W_k)(-\hat{X}_{iu})\\
= 2 \sum_i( - \hat{X^T}_{ui} Y_i + \sum_k \hat{X^T}_{ui} X_{ik} W_k)$$
$$ \nabla _wl = -2 \hat{X}^T Y + 2 {\hat{X}}^T \hat{X} W$$

In [18]:
# produce original W randomly
seed = 123

one = np.ones(X_train.shape[0]).reshape(X_train.shape[0], 1)
one.shape


(820, 1)

In [20]:
X_train_1 = np.hstack((X_train, one))
X_train_1.shape

(820, 13)

In [24]:
X_test_1 = np.hstack((X_test, np.ones(X_test.shape[0]).reshape(X_test.shape[0], 1)))
X_test_1.shape

(205, 13)

In [197]:
def optimize(X, Y, lr=0.0000001, maxiter=100, random_state=seed):
    np.random.seed(random_state)
    W =  np.random.rand(X.shape[1], 1).reshape(X.shape[1], 1)
    for i in range(maxiter):
        frac = np.dot(X.T, np.dot(X, W)) - np.dot(X.T, Y) 
        W = W - 2 * lr * frac

    error = 0
    for i in range(Y.shape[0]):
        for j in range(W.shape[0]):
            error = error + (Y[i, 0] - X[i, j] * W[j, 0])**2
    error = np.sqrt(error)
    return W, error

In [149]:
np.random.seed(seed)
W =  np.random.rand(X_train_1.shape[1], 1)
np.dot(W.T, X_train_1.T).shape

(1, 820)

In [198]:
W_oped, err = optimize(X_train_1, Y_train)
err

9.758375318335654e+124

In [213]:
I = np.identity(X_train_1.T.shape[0])
W = np.dot(np.linalg.inv(np.dot(X_train_1.T, X_train_1) + 7 * I), np.dot(X_train_1.T, Y_train))
W

array([[ 7.54843272e-04],
       [-1.76800713e-01],
       [ 1.14100064e-01],
       [-6.49905245e-04],
       [-2.57827354e-04],
       [ 7.59510215e-02],
       [ 4.28351355e-03],
       [-1.35468326e-01],
       [-6.04629283e-02],
       [ 7.16466134e-02],
       [-9.31408637e-02],
       [-1.06025948e-01],
       [ 2.90676789e-01]])

In [214]:
error = 0
for i in range(Y_test.shape[0]):
    for j in range(W.shape[0]):
        error = error + (Y_test[i, 0] - X_test_1[i, j] * W[j, 0])**2
error = np.sqrt(error)
error

35.221490917208676

In [199]:

error = 0
for i in range(Y_test.shape[0]):
    for j in range(W.shape[0]):
        error = error + (Y_test[i, 0] - X_test_1[i, j] * W_oped[j, 0])**2
error = np.sqrt(error)
error

4.837512310883971e+124

In [201]:
W_oped

array([[2.66418310e+120],
       [3.26392661e+118],
       [4.51958746e+118],
       [6.45529383e+120],
       [1.23309102e+121],
       [2.51370139e+118],
       [7.29475075e+120],
       [1.61281828e+118],
       [5.07044326e+118],
       [6.76883091e+118],
       [3.62385836e+118],
       [1.13340101e+119],
       [4.87112634e+118]])

### ML model  MLP
reference: https://drive.google.com/file/d/1-WTwGeo-6o2a6-GuwncP7AIbVKVBT88o/view (come from the website of MIDILAB)

In [7]:
from sklearn.neural_network import MLPClassifier

MLPmodel = MLPClassifier(random_state=1, max_iter=1000, hidden_layer_sizes=(90, ), learning_rate_init=0.0001)
MLPmodel.fit(X_train, Y_train)
pred_fit = MLPmodel.predict(X_test)
confusion_matrix = metrics.confusion_matrix(Y_test, pred_fit, labels=np.unique(Y_test))
confusion_matrix

array([[78, 31],
       [ 8, 88]])

MLPClassifier trains iteratively since at each time step the partial derivatives of the loss function with respect to the model parameters are computed to update the parameters. It can also have a regularization term added to the loss function that shrinks model parameters to prevent overfitting. This implementation works with data represented as dense numpy arrays or sparse scipy arrays of floating point values.

In [8]:
TP, FN, FP, TN = confusion_matrix.ravel()
accuracy = (TP + TN) / (TP + FP + FN + TN)
accuracy

0.8097560975609757

In [9]:
precision = TP / (TP + FP)
recall = TP / (TP + FN)
print(precision)
print(recall)

0.9069767441860465
0.7155963302752294


In [10]:
f1 = 2 * (precision * recall)/(precision + recall)
f1

0.8

# 