# Prediction Heart Disease with KNN and The combo GridSearchCV with Pipeline

# Import Package

In [3]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


## Data Preprocessing

In [5]:
X = df.drop(columns=['target'])
y = df.target

In [6]:
X_train, X_test , y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=48, stratify=y)

In [7]:
X_train.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
250,51,1,0,140,298,0,1,122,1,4.2,1,3,3
258,62,0,0,150,244,0,1,154,1,1.4,1,0,2
24,40,1,3,140,199,0,1,178,1,1.4,2,0,3
49,53,0,0,138,234,0,0,160,0,0.0,2,0,2
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3


In [8]:
knn = KNeighborsClassifier()

## Use Pipeline and preprocessing data

In [9]:
numerical_pipeline = Pipeline([
    ("scale", MinMaxScaler())
])

In [10]:
preprocessor = ColumnTransformer([
    ("numeric", numerical_pipeline, ['age','sex','cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang'
                                    , 'oldpeak', 'slope', 'ca', 'thal'])
])

In [11]:
pipeline = Pipeline([
    ("prep", preprocessor),
    ("algo", KNeighborsClassifier() )
])

In [12]:
pipeline.fit(X_train, y_train)

In [13]:
pipeline.score(X_test,y_test), pipeline.score(X_train,y_train)

(0.8688524590163934, 0.8636363636363636)

## Use GridSearchCV

In [18]:
model_pipeline = pipeline
parameters = {'algo__n_neighbors': np.arange(1, 50),'algo__weights':['uniform', 'distance'],
              'algo__algorithm':['auto', 'ball_tree', 'kd_tree', 'brute']}
search = GridSearchCV(model_pipeline,parameters,cv=5)

In [19]:
search

In [24]:
search_fit_train = search.fit(X_train, y_train)
search_predict = search.predict(X_test)
search_score = search.score(X_test, y_test)

In [25]:
search_score

0.8852459016393442

In [29]:
search.best_params_

{'algo__algorithm': 'auto', 'algo__n_neighbors': 6, 'algo__weights': 'uniform'}

In [26]:
print(f"Score combination GridSearchCV and Pipeline is {search_score}")

Score combination GridSearchCV and Pipeline is 0.8852459016393442
