In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [18]:
# Let's load required libraries

import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics



In [4]:
# Load data from csv file

df = pd.read_csv('/content/drive/MyDrive/heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [5]:
df.mean()

age          54.366337
sex           0.683168
cp            0.966997
trtbps      131.623762
chol        246.264026
fbs           0.148515
restecg       0.528053
thalachh    149.646865
exng          0.326733
oldpeak       1.039604
slp           1.399340
caa           0.729373
thall         2.313531
output        0.544554
dtype: float64

In [6]:
df.std()

age          9.082101
sex          0.466011
cp           1.032052
trtbps      17.538143
chol        51.830751
fbs          0.356198
restecg      0.525860
thalachh    22.905161
exng         0.469794
oldpeak      1.161075
slp          0.616226
caa          1.022606
thall        0.612277
output       0.498835
dtype: float64

In [7]:
# Feature set

# Let's define feature sets,X:

df.columns


Index(['age', 'sex', 'cp', 'trtbps', 'chol', 'fbs', 'restecg', 'thalachh',
       'exng', 'oldpeak', 'slp', 'caa', 'thall', 'output'],
      dtype='object')

In [8]:
# we have to convert the pandas dataframe to a numpy array

X = df[['age', 'sex', 'cp', 'trtbps', 'chol', 'fbs', 'restecg', 'thalachh',
        'exng', 'oldpeak', 'slp', 'caa', 'thall']].values
X [0:5]

array([[ 63. ,   1. ,   3. , 145. , 233. ,   1. ,   0. , 150. ,   0. ,
          2.3,   0. ,   0. ,   1. ],
       [ 37. ,   1. ,   2. , 130. , 250. ,   0. ,   1. , 187. ,   0. ,
          3.5,   0. ,   0. ,   2. ],
       [ 41. ,   0. ,   1. , 130. , 204. ,   0. ,   0. , 172. ,   0. ,
          1.4,   2. ,   0. ,   2. ],
       [ 56. ,   1. ,   1. , 120. , 236. ,   0. ,   1. , 178. ,   0. ,
          0.8,   2. ,   0. ,   2. ],
       [ 57. ,   0. ,   0. , 120. , 354. ,   0. ,   1. , 163. ,   1. ,
          0.6,   2. ,   0. ,   2. ]])

In [9]:
# what are our labels?

y = df['output'].values
y[0:5]

array([1, 1, 1, 1, 1])

In [10]:
# normalize data

scaler = preprocessing.StandardScaler()
type(scaler)

In [11]:
X = scaler.fit(X).transform(X.astype(float))
X[0:5]



array([[ 0.9521966 ,  0.68100522,  1.97312292,  0.76395577, -0.25633371,
         2.394438  , -1.00583187,  0.01544279, -0.69663055,  1.08733806,
        -2.27457861, -0.71442887, -2.14887271],
       [-1.91531289,  0.68100522,  1.00257707, -0.09273778,  0.07219949,
        -0.41763453,  0.89896224,  1.63347147, -0.69663055,  2.12257273,
        -2.27457861, -0.71442887, -0.51292188],
       [-1.47415758, -1.46841752,  0.03203122, -0.09273778, -0.81677269,
        -0.41763453, -1.00583187,  0.97751389, -0.69663055,  0.31091206,
         0.97635214, -0.71442887, -0.51292188],
       [ 0.18017482,  0.68100522,  0.03203122, -0.66386682, -0.19835726,
        -0.41763453,  0.89896224,  1.23989692, -0.69663055, -0.20670527,
         0.97635214, -0.71442887, -0.51292188],
       [ 0.29046364, -1.46841752, -0.93851463, -0.66386682,  2.08204965,
        -0.41763453,  0.89896224,  0.58393935,  1.43548113, -0.37924438,
         0.97635214, -0.71442887, -0.51292188]])

In [12]:
X[0].mean()

0.059294080594880366

In [13]:
X[0].std()

1.3751063955134064

In [14]:
# train , test split

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)
print('Train set:', X_train.shape, y_train.shape)
print ('Train set:', X_test.shape, y_test.shape)


Train set: (242, 13) (242,)
Train set: (61, 13) (61,)


In [35]:
# K nearest neighbor(KNN)

k = 10
neigh = KNeighborsClassifier(n_neighbors=k).fit(X_train,y_train)

In [36]:
# predicting
# we can use the model to make predictions on the test set

yhat = neigh.predict(X_test)

In [37]:
print('org:' ,y_test[:20])
print('predict:' ,yhat[:20])

org: [1 0 1 1 0 0 0 1 1 1 1 1 1 0 1 0 1 1 0 0]
predict: [0 0 1 1 0 0 0 1 0 1 0 1 1 0 1 0 1 1 0 1]


In [38]:
# accuracy evaluation

metrics.accuracy_score(y_test, yhat)



0.8524590163934426