In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go

from plotly.subplots import make_subplots

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:

sns.set_style("whitegrid")
palette_ro = ["#ee2f35", "#fa7211", "#fbd600", "#75c731", "#1fb86e", "#0488cf", "#7b44ab"]


In [None]:
dfData = pd.read_csv('/kaggle/input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')

In [None]:
dfData.head()

In [None]:
dfData.describe()

In [None]:
dfData.columns

In [None]:
# Checking for null values
dfData.isnull().sum()

In [None]:
correlation = dfData.corr()
plt.figure(figsize=(16,12))
plt.title('Correlation Heatmap of Death event')
ax = sns.heatmap(correlation, square=True, annot=True, fmt='.2f', linecolor='white')
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
ax.set_yticklabels(ax.get_yticklabels(), rotation=30)           
plt.show()

# Gender

In [None]:


dMaleSurvived = dfData[(dfData["DEATH_EVENT"]==0) & (dfData["sex"]==1)] 
dMaleDied = dfData[(dfData["DEATH_EVENT"]==1) & (dfData["sex"]==1)]
dFemaleSurvived = dfData[(dfData["DEATH_EVENT"]==0) & (dfData["sex"]==0)]
dFemaleDied = dfData[(dfData["DEATH_EVENT"]==1) & (dfData["sex"]==0)]

label1 = ["Male","Female"]

label2 = ['Male - Survived','Male - Died', "Female -  Survived", "Female - Died"]

values1 = [(len(dMaleSurvived)+len(dMaleDied)), (len(dFemaleSurvived)+len(dFemaleDied))]
values2 = [len(dMaleSurvived),len(dMaleDied),len(dFemaleSurvived),len(dFemaleDied)]

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]],)
fig.add_trace(go.Pie(labels=label1, values=values1, name="GENDER"),1, 1,)
fig.add_trace(go.Pie(labels=label2, values=values2, name="GENDER VS DEATH_EVENT"),1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent")

fig.update_layout(title_text="GENDER DISTRIBUTION IN THE DATASET  \ GENDER VS DEATH_EVENT",
    
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='GENDER', x=0.19, y=0.5, font_size=10, showarrow=False ,font_color="white"),
                 dict(text='GENDER VS DEATH_EVENT', x=0.84, y=0.5, font_size=9, showarrow=False,font_color="white")],
    
    autosize=False,width=1200, height=500, paper_bgcolor="black",font_color="white")

fig.show()

There are 194 Male and 105 female.
Let us understand how gender affecting Death event

Total 62 male died out of 194 where as 34 in case of female.
So more male are died.

# Age 

In [None]:
fig = px.histogram(x=dfData['age'], nbins=50, histnorm='density',template="plotly_dark",)
fig.update_layout(title='Age distribution:',xaxis_title='Age', yaxis_title='Count')

In [None]:

import plotly.express as px
fig = px.histogram(dfData, x="age", color="DEATH_EVENT", hover_data=dfData.columns,
                   title ="Analysis on Survival - Gender", 
                   labels={"age": "Age"},
                   template="plotly_dark",
                   nbins=50)
fig.show()

In [None]:
fig = px.box(dfData, x='DEATH_EVENT', y='age',color='sex',template="plotly_dark")
fig.update_layout(title_text="Gender wise Age Spread and Death - Male = 1 Female =0")
fig.show()



From the above chart we noted, the mean for age around 60, the common rang for age from 60 to 70, we have extreme value in 94-95 and it's an outlier for the female gender, the average death case for a male is 65 and the average death case for a female is 60, this value it's around the mean for age at all, So here we not have any strong correlation.
let care more in gender, maybe will find the reason behind male death more than female.
note: the number of male recored more than female, So maybe find differences


#Gender

In [None]:
fig = px.box(dfData, x='sex', y='age', points="all" ,template="plotly_dark")
fig.update_layout(title_text="Gender wise Age Spread - Male = 1 Female =0")
fig.show()

In [None]:
fig = px.violin(dfData, y="age", x="sex", color="DEATH_EVENT", box=True, points="all", hover_data=dfData.columns,template="plotly_dark")
fig.update_layout(title_text="Analysis in Age and Gender on Survival Status")
fig.show()

Survival spread is high in age's flow of 40 to 70. The Survival is high for both male between 50 to 60 and female's age between 60 to 70 respectively

Note: Wider sections of the violin plot represent a higher probability of observations taking a given value, the thinner sections correspond to a lower probability and the value of probability is given by kde value for given x

# Diabetes

In [None]:
# "Distribution of AGE Vs DIABETES"

fig = px.histogram(dfData, x="age", color="diabetes", marginal="violin",hover_data=dfData.columns,
                   title ="Distribution of AGE Vs DIABETES", 
                   labels={"diabetes": "DIABETES", "age": "AGE"},
                   template="plotly_dark",
                   color_discrete_map={"0": "RebeccaPurple", "1": "MediumPurple"})
fig.show()

In [None]:
fig = px.histogram(dfData, x="sex", color="diabetes", marginal="violin",hover_data=dfData.columns,
                   title ="Distribution of GENDER Vs DIABETES", 
                   labels={"diabetes": "DIABETES", "sex": "Gender"},
                   template="plotly_dark",
                   color_discrete_map={"0": "RebeccaPurple", "1": "MediumPurple"})
fig.show()

In [None]:

NonDiabetesSurvived = dfData[(dfData["DEATH_EVENT"]==0) & (dfData["diabetes"]==0)]
DiabetesSurvived = dfData[(dfData["DEATH_EVENT"]==0) & (dfData["diabetes"]==1)]
NonDiabetesDied = dfData[(dfData["DEATH_EVENT"]==1) & (dfData["diabetes"]==0)]
DiabetesDied = dfData[(dfData["DEATH_EVENT"]==1) & (dfData["diabetes"]==1)]

label1 = ["Non Diabetes","Diabetes"]
label2 = ['Non Diabetes - Survived','Diabetes - Survived', "Non Diabetes -  Died", "Diabetes  - Died"]
values1 = [(len(NonDiabetesSurvived)+len(NonDiabetesDied)), (len(DiabetesSurvived)+len(DiabetesDied))]
values2 = [len(NonDiabetesSurvived),len(DiabetesSurvived),len(NonDiabetesDied),len(DiabetesDied)]

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])

fig.add_trace(go.Pie(labels=label1, values=values1, name="DIABETES"),1, 1)
fig.add_trace(go.Pie(labels=label2, values=values2, name="DIABETES VS DEATH_EVENT"),1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent")

fig.update_layout(
    title_text="DIABETES DISTRIBUTION IN THE DATASET \
                  DIABETES VS DEATH_EVENT",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='DIABETES', x=0.20, y=0.5, font_size=10, showarrow=False ,font_color="white"),
                 dict(text='DIABETES VS DEATH_EVENT', x=0.84, y=0.5, font_size=8, showarrow=False,font_color="white")],
    autosize=False,width=1200, height=500, paper_bgcolor="black", font_color="white")
fig.show()

Insight:From the above subplot we can conclude that in our dataset 57.9% are NON DIABETIC (out of which 39.4% survived and 18.5% died) and 42.1% are DIABETIC (out of which 28.6% survived and 13.5% died).

## SMOKING

In [None]:
# "Distribution of AGE Vs SMOKING"

fig = px.histogram(dfData, x="age", color="smoking", marginal="violin",hover_data=dfData.columns,
                   title ="Distribution of AGE Vs SMOKING", 
                   labels={"smoking": "SMOKING", "age": "AGE"},
                   template="plotly_dark",
                   color_discrete_map={"0": "RebeccaPurple", "1": "MediumPurple"})
fig.show()

The Survival is high for not smoking person 55 to 65, while for smoking person it is between 50 to 60
Death event for smoking person is high than not smoking person

In [None]:
d1 = dfData[(dfData["DEATH_EVENT"]==0) & (dfData["smoking"]==0)]
d2 = dfData[(dfData["DEATH_EVENT"]==1) & (dfData["smoking"]==0)]
d3 = dfData[(dfData["DEATH_EVENT"]==0) & (dfData["smoking"]==1)]
d4 = dfData[(dfData["DEATH_EVENT"]==1) & (dfData["smoking"]==1)]

label1 = ["No Smoking","Smoking"]
label2 = ['No Smoking - Survived','No Smoking - Died', "Smoking - Survived", "Smoking - Died"]
values1 = [(len(d1)+len(d2)), (len(d3)+len(d4))]
values2 = [len(d1),len(d2),len(d3),len(d4)]

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=label1, values=values1, name="SMOKING"),
              1, 1)
fig.add_trace(go.Pie(labels=label2, values=values2, name="SMOKING VS DEATH_EVENT"),
              1, 2 )

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent")

fig.update_layout(
    title_text="SMOKING DISTRIBUTION IN THE DATASET \
                  SMOKING VS DEATH_EVENT",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='SMOKING', x=0.20, y=0.5, font_size=10, showarrow=False, font_color="white"),
                 dict(text='SMOKING VS DEATH_EVENT', x=0.84, y=0.5, font_size=8, showarrow=False,font_color="white")],
    autosize=False,width=1200, height=500, paper_bgcolor="black",font_color="white")
fig.show()

Insight:From the above subplot we can conclude that in our dataset 67.7% do not SMOKE (out of which 45.8% survived and 21.9% died) and 32.3% do SMOKE (out of which 22.2% survived and 10.1% died).

# Anaemia

In [None]:
d1 = dfData[(dfData["DEATH_EVENT"]==0) & (dfData["anaemia"]==0)]
d2 = dfData[(dfData["DEATH_EVENT"]==1) & (dfData["anaemia"]==0)]
d3 = dfData[(dfData["DEATH_EVENT"]==0) & (dfData["anaemia"]==1)]
d4 = dfData[(dfData["DEATH_EVENT"]==1) & (dfData["anaemia"]==1)]

label1 = ["No Anaemia","Anaemia"]
label2 = ['No Anaemia - Survived','No Anaemia - Died', "Anaemia -  Survived", "Anaemia  - Died"]
values1 = [(len(d1)+len(d2)), (len(d3)+len(d4))]
values2 = [len(d1),len(d2),len(d3),len(d4)]

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=label1, values=values1, name="ANAEMIA"),
              1, 1)
fig.add_trace(go.Pie(labels=label2, values=values2, name="ANAEMIA VS DEATH_EVENT"),
              1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent")

fig.update_layout(
    title_text="ANAEMIA DISTRIBUTION IN THE DATASET \
                  ANAEMIA VS DEATH_EVENT",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='ANAEMIA', x=0.20, y=0.5, font_size=10, showarrow=False),
                 dict(text='ANAEMIA VS DEATH_EVENT', x=0.84, y=0.5, font_size=8, showarrow=False)],
    autosize=False,width=1400, height=500, paper_bgcolor="white")
fig.show()

Insight:From the above subplot we can conclude that in our dataset 56.9% are NON ANAEMIC (out of which 40.1% survived and 16.8% died) and 43.1% are ANAEMIC (out of which 27.9% survived and 15.2% died).

In [None]:
# "Distribution of AGE Vs ANAEMIA"
import plotly.express as px
fig = px.histogram(dfData, x="age", color="anaemia", marginal="violin",hover_data=dfData.columns,
                   title ="Distribution of AGE Vs ANAEMIA", 
                   labels={"a": "anaemia", "age": "AGE"},
                   template="plotly_dark",
                   color_discrete_map={"0": "RebeccaPurple", "1": "MediumPurple"})
fig.show()

# High blood pressure

In [None]:
# "Distribution of AGE Vs High Blood Pressure"

fig = px.histogram(dfData, x="age", color="high_blood_pressure", marginal="violin",hover_data=dfData.columns,
                   title ="Distribution of AGE Vs BLOOD PRESSURE", 
                   labels={"high_blood_pressure": "Blood Pressure", "age": "AGE"},
                   template="plotly_dark",
                   color_discrete_map={"0": "RebeccaPurple", "1": "MediumPurple"})
fig.show()

In [None]:
d1 = dfData[(dfData["DEATH_EVENT"]==0) & (dfData["high_blood_pressure"]==0)]
d2 = dfData[(dfData["DEATH_EVENT"]==1) & (dfData["high_blood_pressure"]==0)]
d3 = dfData[(dfData["DEATH_EVENT"]==0) & (dfData["high_blood_pressure"]==1)]
d4 = dfData[(dfData["DEATH_EVENT"]==1) & (dfData["high_blood_pressure"]==1)]

label1 = ["No High Blood Pressure","High Blood Pressure"]
label2 = ['No High Blood Pressure - Survived','No High Blood Pressure - Died', "High Blood Pressure -  Survived", "High Blood Pressure  - Died"]
values1 = [(len(d1)+len(d2)), (len(d3)+len(d4))]
values2 = [len(d1),len(d2),len(d3),len(d4)]

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=label1, values=values1, name="High Blood Pressure"),
              1, 1)
fig.add_trace(go.Pie(labels=label2, values=values2, name="High Blood Pressure VS DEATH_EVENT"),
              1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent")

fig.update_layout(
    title_text="High Blood Pressure DISTRIBUTION IN THE DATASET \
                  High Blood Pressure VS DEATH_EVENT",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='High Blood Pressure', x=0.20, y=0.5, font_size=10, showarrow=False),
                 dict(text='High Blood Pressure VS DEATH_EVENT', x=0.84, y=0.5, font_size=8, showarrow=False)],
    autosize=False,width=1200, height=500, paper_bgcolor="white")
fig.show()

# Creatinine phosphokinase

In [None]:
fig, (ax1) = plt.subplots(1, 1, figsize=(8, 8))

sns.distplot(dfData["creatinine_phosphokinase"], ax=ax1, color=palette_ro[5])

ax1.set_title("creatinine_phosphokinase distribution", fontsize=16);

ax1.axvline(x=dfData["creatinine_phosphokinase"].median(), color=palette_ro[5], linestyle="--", alpha=0.5)




In [None]:
fig = px.box(dfData, y="creatinine_phosphokinase")
fig.show()

##tips = sns.load_dataset("tips")
#ax = sns.boxplot(x=dfData["creatinine_phosphokinase"])

In [None]:
#hist_data = [dfData["creatinine_phosphokinase"].values.tolist()]
#group_labels = ['CPK'] # name of the dataset

#fig = ff.create_distplot(hist_data, group_labels,bin_size=0.2)
#fig.show()

In [None]:
fig = px.histogram(dfData, x="creatinine_phosphokinase", color="DEATH_EVENT", marginal="violin", hover_data=dfData.columns,
                  title ="Relationship between creatinine_phosphokinase and DEATH_EVENT", 
                   labels={"creatinine_phosphokinase": "creatinine_phosphokinase", "DEATH_EVENT": "DEATH_EVENT"},
                
                   color_discrete_map={"0": "RebeccaPurple", "1": "MediumPurple"})
fig.show()



In [None]:
sns.scatterplot(x = "time", y = "creatinine_phosphokinase", data = dfData[["creatinine_phosphokinase", "time", "DEATH_EVENT"]],
                hue = "DEATH_EVENT");

# Ejection fraction

In [None]:
fig, (ax1) = plt.subplots(1, 1, figsize=(8, 8))

sns.distplot(dfData["ejection_fraction"], ax=ax1, color=palette_ro[5])

ax1.set_title("ejection_fraction distribution", fontsize=16);

ax1.axvline(x=dfData["ejection_fraction"].median(), color=palette_ro[5], linestyle="--", alpha=0.5)




In [None]:
fig = px.box(dfData, y="ejection_fraction")
fig.show()

#ax = sns.boxplot(x=dfData["ejection_fraction"])


In [None]:
#sns.histplot(data=dfData, x="ejection_fraction", kde=True)

In [None]:
fig = px.histogram(dfData, x="ejection_fraction", color="DEATH_EVENT", marginal="violin", hover_data=dfData.columns,
                  title ="Relationship between ejection_fraction and DEATH_EVENT", 
                   labels={"ejection_fraction": "ejection_fraction", "DEATH_EVENT": "DEATH_EVENT"},
                
                   color_discrete_map={"0": "RebeccaPurple", "1": "MediumPurple"})
fig.show()

In [None]:
#sns.histplot(data=dfData, x="ejection_fraction", hue="DEATH_EVENT", multiple="stack")

In [None]:
sns.scatterplot(x = "time", y = "ejection_fraction", data = dfData[["ejection_fraction", "time", "DEATH_EVENT"]],
                hue = "DEATH_EVENT");

# Platelets

In [None]:
fig, (ax1) = plt.subplots(1, 1, figsize=(8, 8))

sns.distplot(dfData["platelets"], ax=ax1, color=palette_ro[5])

ax1.set_title("platelets distribution", fontsize=16);

ax1.axvline(x=dfData["platelets"].median(), color=palette_ro[5], linestyle="--", alpha=0.5)





In [None]:
fig = px.box(dfData, y="platelets")
fig.show()

#ax = sns.boxplot(x=dfData["platelets"])

In [None]:
#hist_data = [dfData["platelets"].values.tolist()]
#group_labels = ['platelets'] # name of the dataset

##fig = ff.create_distplot(hist_data, group_labels,bin_size=0.2)
#fig.show()


In [None]:
fig = px.histogram(dfData, x="platelets", color="DEATH_EVENT", marginal="violin", hover_data=dfData.columns,
                  title ="Relationship between platelets and DEATH_EVENT", 
                   labels={"platelets": "platelets", "DEATH_EVENT": "DEATH_EVENT"},
                
                   color_discrete_map={"0": "RebeccaPurple", "1": "MediumPurple"})
fig.show()


In [None]:
sns.scatterplot(x = "time", y = "platelets", data = dfData[["platelets", "time", "DEATH_EVENT"]],
                hue = "DEATH_EVENT");

# Serum creatinine

In [None]:
fig, (ax1) = plt.subplots(1, 1, figsize=(8, 8))

sns.distplot(dfData["serum_creatinine"], ax=ax1, color=palette_ro[5])

ax1.set_title("serum_creatinine distribution", fontsize=16);

ax1.axvline(x=dfData["serum_creatinine"].median(), color=palette_ro[5], linestyle="--", alpha=0.5)



In [None]:
fig = px.box(dfData, y="serum_creatinine")
fig.show()

In [None]:
fig = px.histogram(dfData, x="serum_creatinine", color="DEATH_EVENT", marginal="violin", hover_data=dfData.columns,
                  title ="Relationship between serum_creatinine and DEATH_EVENT", 
                   labels={"serum_creatinine": "serum_creatinine", "DEATH_EVENT": "DEATH_EVENT"},
                
                   color_discrete_map={"0": "RebeccaPurple", "1": "MediumPurple"})
fig.show()

In [None]:
sns.scatterplot(x = "time", y = "serum_creatinine", data = dfData[["serum_creatinine", "time", "DEATH_EVENT"]],
                hue = "DEATH_EVENT");

# Serum sodium

In [None]:
fig, (ax1) = plt.subplots(1, 1, figsize=(8, 8))

sns.distplot(dfData["serum_sodium"], ax=ax1, color=palette_ro[5])

ax1.set_title("serum_sodium distribution", fontsize=16);

ax1.axvline(x=dfData["serum_sodium"].median(), color=palette_ro[5], linestyle="--", alpha=0.5)









In [None]:
fig = px.box(dfData, y="serum_sodium")
fig.show()


In [None]:

fig = px.histogram(dfData, x="serum_sodium", color="DEATH_EVENT", marginal="violin", hover_data=dfData.columns,
                  title ="Relationship between serum_sodium and DEATH_EVENT", 
                   labels={"serum_sodium": "serum_sodium", "DEATH_EVENT": "DEATH_EVENT"},
                
                   color_discrete_map={"0": "RebeccaPurple", "1": "MediumPurple"})
fig.show()

In [None]:
sns.scatterplot(x = "time", y = "serum_sodium", data = dfData[["serum_sodium", "time", "DEATH_EVENT"]],
                hue = "DEATH_EVENT");

# Time

In [None]:
folowUp = dfData.groupby('time').sum()['DEATH_EVENT']
folowUp.plot(figsize=(20,10),title='trends on death times as folow up increased')

The figure below is based on a scatterplot from the paper .

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 8))

sns.scatterplot(x=dfData["serum_creatinine"], y=dfData["ejection_fraction"], ax=ax,
                palette=[palette_ro[1], palette_ro[6]], hue=dfData["DEATH_EVENT"])
ax.plot([0.9, 5.3], [13, 80.0], color="gray", ls="--")

fig.suptitle("Relationship between serum_creatinine and ejection_fraction against DEATH_EVENT", fontsize=18);

In [None]:
#sns.scatterplot(x = "time", y = "serum_sodium", data = df[["serum_sodium", "time", "DEATH_EVENT"]], hue = "DEATH_EVENT");



## Mortlity Rate Based on Age

In [None]:
g_30=list()
g_50=list()
g_70=list()
greater70 = list()
for i in dfData.age:
  if i<=30:
    g_30.append(1)
    g_50.append(0)
    g_70.append(0)
    greater70.append(0)

  elif i>30 and i<=50:
    g_30.append(0)
    g_50.append(1)
    g_70.append(0)
    greater70.append(0)
  
  elif i>50 and i<=70:
    g_30.append(0)
    g_50.append(0)
    g_70.append(1)
    greater70.append(0)
  
  elif i>70:
    g_30.append(0)
    g_50.append(0)
    g_70.append(0)
    greater70.append(1)


In [None]:
dfData['age_till_30'] = g_30
dfData['age_bet_30_50'] = g_50
dfData['age_bet_50_70'] = g_70
dfData['age_gret_70'] = greater70

In [None]:
data = [['age_till_30', 0], 
        ['age_bet_30_50', dfData["DEATH_EVENT"][dfData["age_bet_30_50"] == 1].value_counts(normalize = True)[1]*100], 
        ['age_bet_50_70', dfData["DEATH_EVENT"][dfData["age_bet_50_70"] == 1].value_counts(normalize = True)[1]*100],
        ['age_gret_70', dfData["DEATH_EVENT"][dfData["age_gret_70"] == 1].value_counts(normalize = True)[1]*100]] 

In [None]:
## Mortlity Rate Based on Age
dfMortlity = pd.DataFrame(data, columns = ['Age Range', 'Mortlity Rate (%)']) 

In [None]:
dfMortlity