In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('/kaggle/input/heart-disease-uci/heart.csv')
df.head()

In [None]:
df.describe()

In [None]:
# Check for Null-values
df.isnull().sum()

## ML Classification Model (K-Nearest-Neighbour) 

In [None]:
# Features
X = df.drop("target", axis=1).values

# Labels (Categories)
y = df['target'].values

In [None]:
# Create Scaler for Data Normalization
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X = scaler.fit(X).transform(X.astype(float))
print(X[:3])

In [None]:
# Train/Test Data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)
print ('Train set:', X_train.shape,  y_train.shape)
print ('Test set:', X_test.shape,  y_test.shape)

In [None]:
# Create K-Nearest Neighbour and calculate accuracy for different 'Ks'
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

Ks = 30
mean_acc = np.zeros((Ks-1))

for n in range(1,Ks):
    # Train Model and Predict
    clm = KNeighborsClassifier(n_neighbors=n).fit(X_train, y_train)
    yhat = clm.predict(X_test)
    mean_acc[n-1] = metrics.accuracy_score(y_test, yhat)

print('Maximum Accuracy achieved:', mean_acc.max())

In [None]:
# Plot accuracy per K
import matplotlib.pyplot as plt
plt.plot(range(1, Ks), mean_acc, 'g')
plt.legend('Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Number of Neighbors (K)')
plt.title('Accuracy of Model for each K')
plt.tight_layout()
plt.show()
print("Best accuracy with", mean_acc.max(), "with k =", mean_acc.argmax()+1)

In [None]:
# Create and Export final Model + Scaler
import pickle as pkl
K = 20
clm = KNeighborsClassifier(n_neighbors=K).fit(X_train,y_train)
yhat = clm.predict(X_test)
print('Accuracy-Score:', metrics.accuracy_score(y_test, yhat))
pkl.dump(clm, open('final_model.p','wb'))
pkl.dump(scaler, open('scaler.sav', 'wb'))

## Create simple Command Line App to use the model

In [None]:
# sample to test the implementation
user_input = df.iloc[1].to_frame()
user_input

In [None]:
# Load model and scaler
scaler = pkl.load(open('./scaler.sav', 'rb'))
clm = pkl.load(open('./final_model.p', 'rb'))

# Ask user:
print("Age: ")
age = input("")
print("Sex (0, 1): ")
sex = input("")
print("Chest Pain Type (0-3): ")
cp = input("")
print("Resting Blood Pressure: ")
trestbps = input("")
print("serum cholestoral in mg/dl: ")
chol = input("")
print("Fasting blood sugar; 120 mg/dl (1 = true; 0 = false): ")
fbs = input("")
print("Resting Electrocardiographic Results (0, 1, 2): ")
restecg = input("")
print("Maximum heart rate achieved: ")
thalach = input("")
print("Exercise induced angina (1 = yes; 0 = no): ")
exang = input("")
print("ST depression induced by exercise relative to rest: ")
oldpeak = input("")
print("the slope of the peak exercise ST segment: ")
slope = input("")
print("number of major vessels (0-3) colored by flourosopy: ")
ca = input("")
print("3 = normal; 6 = fixed defect; 7 = reversable defect: ")
thal = input("")

Xi = [age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal]
Xi = np.array(Xi)
Xi = Xi.reshape(1,-1).astype(float)
Xi = scaler.transform(Xi.astype(float))
prediction = clm.predict(Xi)

if prediction[0] == 0:
    print('No Risk of Heart Disease')
else:
    print('Risk of Heart Disease')