# Importing and cleaning

In [None]:
import numpy as np
import pandas as pd

In [None]:
df = pd.read_csv('../input/heart-failure-prediction/heart.csv')
df.head(5)

In [None]:
df1 = pd.get_dummies(df, columns=['ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope', 'Sex'])
df1.head(5)

In [None]:
df1.drop(columns=['ChestPainType_TA', 'RestingECG_ST', 'ExerciseAngina_N', 'ST_Slope_Down', 'Sex_M'], inplace=True)
df1.head(5)

In [None]:
features = df1.drop(columns=['HeartDisease']).iloc[:,0:-1].to_numpy()
pred = df1.HeartDisease.to_numpy()
pred.shape

# Modeling

In [None]:
import sklearn
from sklearn.linear_model import LogisticRegression
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt

In [None]:
xtrain, xtest, ytrain, ytest= sklearn.model_selection.train_test_split(features, pred, test_size=.33, random_state=42)

In [None]:
base_model = LogisticRegression(solver='lbfgs', max_iter=1000)
base_model.fit(xtrain, ytrain)
print(sklearn.metrics.classification_report(ytest, base_model.predict(xtest).round()))

In [None]:
ypred = base_model.predict(xtest)
conf = sklearn.metrics.confusion_matrix(ytest, ypred)
conf = conf/ypred.shape[0]*100
sns.heatmap(conf, annot=True, vmin=0, vmax=100)

### We can see that the false negatives in our model is 9.6%. This is rather high and we should try to reduce the false positives as much as possible.

In [None]:
fpr, tpr, thresholds = sklearn.metrics.roc_curve(ytest, base_model.predict_proba(xtest)[:,1])
plt.plot([0,1], [0,1], linestyle='--')
plt.plot(fpr, tpr, marker='.')

In [None]:
new_thresh = thresholds[tpr >=0.96][0]

In [None]:
ypred = (base_model.predict_proba(xtest)[:,1] >= new_thresh).astype(bool)
conf = sklearn.metrics.confusion_matrix(ytest, ypred)
conf = conf/ypred.shape[0]*100
sns.heatmap(conf, annot=True, vmin=0,vmax=100)

### The new threshold is giving a much better as it only gives around 2.3% false positives. 

## Given below is a sample case of how the model could be used :

In [None]:
patient = xtest[5].reshape(1,-1)
print("The patient is at a risk of having a heart disease with a chance of", base_model.predict_proba(patient)[:,1].round(4)*100, '%')
if(base_model.predict_proba(patient)[:,1] >=new_thresh):
    print("The patient needs treatment.")
else:
    print("The patient does not need treatment.")