## Importing required libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings(action='ignore')
plt.style.use(['seaborn-bright','dark_background'])

## Importing required dataset of heart failure record downloaded from the Kaggle dataset.

In [None]:
data = pd.read_csv('../input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')
data.head()

#### Here in our dataset DEATH EVENT is our dependent feature which consists of binary values so it will be classification problem and we will consider it as our target variable. Also, other  features are our independent variable.

### Cheacking null values if any present in our dataset.

In [None]:
data.isnull().sum()

### Cheacking data type of all features present in our data set

In [None]:
data.info()

### Finding  number of unique values present in each feature.

In [None]:
data.nunique()

#### In our dataset features like anaemia,diabetes,high_blood_pressure,sex,smoking consists of bianary values just because they consists only two unique values.

#### Visualization of correlation between independent features and target variable.

In [None]:
correlation = data.corr()
correlation

### For better visualization ploting a heatmap with the seaborn library.

In [None]:
sns.set(rc = {'figure.figsize':(8,8)})
sns.heatmap(correlation,cmap="PiYG")
plt.title("Heatmap")
plt.show()

### It seems like the feature age and serum creatinine has little bit positive correlation and ejection fraction also has little negetive correlation on death event, on other hand time has better negetive correlation with target.But others features dosen't impact that much.

#### Dividing out data into two parts as independent variables and target variable.

In [None]:
X = data.drop(columns=['DEATH_EVENT'])
Y = data['DEATH_EVENT']

#### Scaling the X dataset

In [None]:
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
column = X.columns
scaled_X = scale.fit_transform(X)
scaled_X = pd.DataFrame(scaled_X,columns=column)
scaled_X.head()

#### Spliting data as train and test data.

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(scaled_X,Y,test_size=0.2,random_state=101)

#### Importing the different classification models for compairing score with each other.

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble  import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

In [None]:
models = []
models.append(("LogisticRegression",LogisticRegression()))
models.append(("DescisionTree",DecisionTreeClassifier()))
models.append(("RandomForest",RandomForestClassifier()))
models.append(("SupportVector",SVC()))
models.append(("KNeighbors",KNeighborsClassifier()))

In [None]:
for name,model in models:
    model.fit(X_train,Y_train)
    train_score = model.score(X_train,Y_train)
    test_score = model.score(X_test,Y_test)
    print(name,"train score =",train_score)
    print(name,"test score =",test_score)

#### It looks like Random Forest leads score comparision on our dataset against logistic regression,descisiontree,svm and KNN.

#### Cheacking score of our Random Forest model on our test data set for different max_depth.

In [None]:
for i in range(1,20):
    model = RandomForestClassifier(max_depth=i)
    model.fit(X_train,Y_train)
    score = model.score(X_test,Y_test)
    print("for max depth ",i,"score =",score)

#### For max depth 3 it gives maximum score

In [None]:
model = RandomForestClassifier(max_depth=3)
model.fit(X_train,Y_train)
prediction = model.predict(X_test)
probablities = model.predict_proba(X_test)
model.score(X_test,Y_test)

### We have created a model using RandomForestClassifier with accuracy score 0.934 i.e 93.4%

#### Creating confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(Y_test,prediction))

#### Creating classification report

In [None]:
from sklearn.metrics import classification_report
print(classification_report(Y_test,prediction))

## Ploting the Precision-Recall Curve

In [None]:
from sklearn.metrics import precision_recall_curve
precision_points, recall_points, threshold_points = precision_recall_curve(Y_test,probablities[:,1])
precision_points.shape, recall_points.shape, threshold_points.shape

In [None]:
plt.style.use(['seaborn-dark','dark_background'])
plt.figure(dpi =100, figsize=(6,6))
plt.plot(threshold_points, precision_points[:-1], color = 'r', label = 'Precision')
plt.plot(threshold_points, recall_points[:-1], color = 'b', label = 'Recall')
plt.xlabel('Threshold')
plt.ylabel('Frequency')
plt.title('Precision-Recall Curve')
plt.legend()
plt.show()

## Ploting AUC-ROC curve

In [None]:
from sklearn.metrics import roc_curve,roc_auc_score
fpr, tpr, threshold = roc_curve(Y_test ,probablities[:,1])

In [None]:
plt.style.use(['seaborn-dark','dark_background'])
plt.figure(dpi = 100, figsize=(8,6))
plt.plot(fpr,tpr, color = 'r', label='FPR-TPR')
plt.plot([0,1],[0,1], color = 'g', label = 'Baseline')
plt.title('AUC-ROC Curve')
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.legend()
plt.show()