In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import plotly.express as px
import plotly.graph_objs as go

In [None]:
df = pd.read_csv('../input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')


In [None]:
df.head()

In [None]:
df.columns 


In [None]:
df.shape


In [None]:
df.describe()


In [None]:
df.dtypes


In [None]:
def missing_value_of_data(data):
    total=df.isnull().sum().sort_values(ascending=False)
    percentage=round(total/df.shape[0]*100,2)
    return pd.concat([total,percentage],axis=1,keys=['Total','Percentage'])
missing_value_of_data(df)

In [None]:
df.corr()


In [None]:
df['DEATH_EVENT'].value_counts()


In [None]:
fig = px.pie(df, names = "DEATH_EVENT", title = "DEATH_EVENT", color_discrete_sequence=px.colors.qualitative.G10)
fig.show()

In [None]:
fig = px.pie(df, names = "anaemia", title = "anaemia", color_discrete_sequence=px.colors.qualitative.G10)
fig.show()

In [None]:
fig = px.pie(df, names = "diabetes", title = "diabetes", color_discrete_sequence=px.colors.qualitative.G10)
fig.show()

In [None]:
plt.figure(figsize=(10,6))
sns.distplot(df['age'], color='r')
plt.show()

In [None]:
fig = px.histogram(
    df, 
    "creatinine_phosphokinase", 
    nbins=80, 
    title ='creatinine_phosphokinase', 
    width=800,
    height=500
)

fig.show()

In [None]:
fig = px.histogram(
    df, 
    "platelets", 
    nbins=80, 
    title ='platelets ', 
    width=800,
    height=500
)

fig.show()

In [None]:
fig = px.pie(df, names = "smoking", title = "smoking", color_discrete_sequence=px.colors.qualitative.Set1)
fig.show()

In [None]:
trace0 = go.Box(
    name = "serum_creatinine",
    y = df["serum_creatinine"]
)

trace1 = go.Box(
    name = "serum_sodium",
    y = df["serum_sodium"]
)

trace2 = go.Box(
    name = "ejection_fraction",
    y = df["ejection_fraction"]
)

trace3 = go.Box(
    name = "high_blood_pressure",
    y = df["high_blood_pressure"] 
)




data = [trace0, trace1, trace2 , trace3 ]
plotly.offline.iplot(data)

In [None]:
fig = px.scatter_matrix(df, dimensions=['platelets', 'age', 'serum_creatinine', 'DEATH_EVENT'])
fig.show()

In [None]:
fig = px.box(
    df, 
    x="DEATH_EVENT", 
    y="creatinine_phosphokinase", 
    title='creatinine_phosphokinase & DEATH_EVENT box plot',
    width=500,
    height=700    
)

fig.show()

In [None]:
fig =px.bar(df,x='serum_sodium', y='DEATH_EVENT',barmode='group',
             height=600)
fig.show()

In [None]:
plt.subplots(figsize=(15,5))
sns.countplot(x = 'sex', data = df, hue = 'DEATH_EVENT', palette = 'dark')
plt.show()

In [None]:
plt.subplots(figsize=(20,10))
sns.countplot(x = 'ejection_fraction', data = df, hue = 'DEATH_EVENT', palette = 'dark')
plt.show()

In [None]:


plt.subplots(figsize=(20,10))
sns.countplot(x = 'anaemia', data = df, hue = 'DEATH_EVENT', palette = 'dark')
plt.show()

In [None]:
plt.subplots(figsize=(20,10))
sns.countplot(x = 'smoking', data = df, hue = 'DEATH_EVENT', palette = 'dark')
plt.show()

In [None]:
plt.subplots(figsize=(20,10))
sns.countplot(x = 'high_blood_pressure', data = df, hue = 'DEATH_EVENT', palette = 'dark')
plt.show()

In [None]:
plt.subplots(figsize=(20,10))
sns.countplot(x = 'diabetes', data = df, hue = 'DEATH_EVENT', palette = 'dark')
plt.show()

In [None]:
#correlation map
df.corr
f,ax = plt.subplots(figsize=(30,30))
sns.heatmap(df.corr(), annot =True, linewidth =".5", fmt =".2f", cmap='cividis')
plt.show()

In [None]:
df['age'] = df['age'] / df['age'].max()
df['creatinine_phosphokinase'] = df['creatinine_phosphokinase'] / df['creatinine_phosphokinase'].max()
df['ejection_fraction'] = df['ejection_fraction'] / df['ejection_fraction'].max()
df['platelets'] = df['platelets'] / df['platelets'].max()
df['serum_creatinine'] = df['serum_creatinine'] / df['serum_creatinine'].max()
df['serum_sodium'] = df['serum_sodium'] / df['serum_sodium'].max()
df['time'] = df['time'] / df['time'].max()


In [None]:
df.head()

In [None]:
X = df.drop(columns=["DEATH_EVENT"])
y = df["DEATH_EVENT"]

In [None]:
from sklearn.model_selection import train_test_split

#Splitting data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=44, shuffle =True)

#Splitted Data
#print('X_train shape is ' , X_train.shape)
#print('X_test shape is ' , X_test.shape)
#print('y_train shape is ' , y_train.shape)
#print('y_test shape is ' , y_test.shape)

In [None]:
from sklearn.ensemble import RandomForestClassifier

#Applying RandomForestClassifier Model 

RandomForestClassifierModel = RandomForestClassifier(criterion = 'gini',n_estimators=100,max_depth=2,random_state=33) #criterion can be also : entropy 
RandomForestClassifierModel.fit(X_train, y_train)

#Calculating Details
print('RandomForestClassifierModel Train Score is : ' , RandomForestClassifierModel.score(X_train, y_train))
print('RandomForestClassifierModel Test Score is : ' , RandomForestClassifierModel.score(X_test, y_test))
#print('----------------------------------------------------')

#Calculating Prediction
y_pred = RandomForestClassifierModel.predict(X_test)
y_pred_prob = RandomForestClassifierModel.predict_proba(X_test)
#print('Predicted Value for RandomForestClassifierModel is : ' , y_pred[:10])
#print('Prediction Probabilities Value for RandomForestClassifierModel is : ' , y_pred_prob[:10])

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

#Calculating Confusion Matrix
CM = confusion_matrix(y_test, y_pred)
print('Confusion Matrix is : \n', CM)

# drawing confusion matrix
sns.heatmap(CM, center = True,cmap='YlGnBu')
plt.show()