# Import Libraries

In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib

 ## Setting Parametres 

In [None]:
#default theme
sns.set(context='notebook', style='darkgrid', palette='deep', font='sans-serif', font_scale=1, color_codes=False, rc=None)
matplotlib.rcParams['figure.figsize'] =[10,8]
matplotlib.rcParams.update({'font.size': 18})
matplotlib.rcParams['font.family'] = 'sans-serif'

# Import Data

In [None]:
df=pd.read_csv("../input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv")
df.head()

# Columns description
1. anaemia:Decrease of red blood cells or hemoglobin (boolean)
2. creatinine_phosphokinase:Level of the CPK enzyme in the blood (mcg/L)
3. diabetes:If the patient has diabetes (boolean)
4. ejection_fraction:Ejection fraction (EF) is a measurement, expressed as a percentage, of how much blood the left ventricle pumps out with each contraction
5. high_blood_pressure:blood hypertension
6. platelets:are a component of blood whose function (along with the coagulation factors)
7. serum_creatinine:Serum creatinine is widely interpreted as a measure only of renal function
8. serum_sodium: to see how much sodium is in your blood it is particularly important for nerve and muscle function.



In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.dtypes

In [None]:
df.dtypes.value_counts().plot.bar();

* . as we can see that our data is only numurical

In [None]:
df.isnull().sum()

. our data don't need a cleaning there is no missing values

In [None]:
list(df.columns)

In [None]:
df.shape

In [None]:
df.DEATH_EVENT.value_counts()

. as we can see that we have 96 cases of death_event.

In [None]:
df.DEATH_EVENT.value_counts().plot.bar();
plt.title("Death cases");

  # Data visualization

In [None]:
plt.figure(figsize=(10,6))
plt.title("EVENT_DEATH per cent")
df.DEATH_EVENT.value_counts().plot.pie(autopct="%1.1f%%");

In [None]:
sns.countplot(x='DEATH_EVENT',data=df);

In [None]:
df.hist(edgecolor='black',figsize=(10,10));

In [None]:
df.corr()

. as we can see there is a high corrolation between death_event and age,serum sodium,serum creatinine,ejection fraction,and time.

In [None]:
plt.figure(figsize=(15,7))
plt.title("Average of EVENT_DEATH by age")
sns.barplot(x=df.age, y=df['DEATH_EVENT']);
plt.xticks(rotation=90);

In [None]:
df.serum_sodium.value_counts().plot.bar();

In [None]:
df.serum_creatinine.value_counts().plot.bar();

In [None]:
df.ejection_fraction.value_counts().plot.bar();

In [None]:
plt.figure(figsize=(15,7))
plt.title("Average of EVENT_DEATH by ejection_fraction")
sns.barplot(x=df.ejection_fraction.value_counts(), y=df['DEATH_EVENT']);

In [None]:
plt.figure(figsize=(15,7))
plt.title("Average of EVENT_DEATH by serum creatinine")
sns.barplot(x=df.serum_creatinine.value_counts(), y=df['DEATH_EVENT']);

In [None]:
plt.figure(figsize=(15,7))
plt.title("Average of EVENT_DEATH by serum sodium")
sns.barplot(x=df.serum_sodium.value_counts(), y=df['DEATH_EVENT']);

In [None]:
plt.figure(figsize=(15,7))
plt.title("Average of EVENT_DEATH by time")
sns.barplot(x=df.time.value_counts(), y=df['DEATH_EVENT']);

In [None]:
df.serum_sodium.value_counts()

In [None]:
df.serum_creatinine.value_counts()

In [None]:
df.ejection_fraction.value_counts()

In [None]:
df.time.value_counts()

In [None]:
sns.countplot(x='serum_creatinine',data=df);
plt.xticks(rotation=90);

In [None]:
sns.countplot(x='ejection_fraction',data=df);

In [None]:
plt.figure(figsize=(15,7))
sns.countplot(x='serum_sodium',data=df);

# Data Spliting

* import libraries

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import accuracy_score

* droping the target

In [None]:
x=df.drop('DEATH_EVENT', axis=1)
y=df.DEATH_EVENT

In [None]:
plt.figure(figsize=(20,8))
sns.heatmap(df.corr(),annot=True,cmap="RdYlGn");

* as we can see that the corrolation between death_event and sex,diabetes is very low so we goona drop these tow columns


In [None]:
df=df.drop('sex', axis=1)

In [None]:
df=df.drop('diabetes', axis=1)

In [None]:
df

* after droping the sex and diabetes columns we gonna check the new shape

In [None]:
df.shape

* now we need to split the data to train and test parts

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=5)

* cheking the shape of all

In [None]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

# Machine learning algorithms

* after spliting data now we need to applicate the machine learning algorithms

# Logistic regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
logreg = LogisticRegression()
logreg.fit(x,y)
y_pred = logreg.predict(x)
print(metrics.accuracy_score(y, y_pred)*100)

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')
knn.fit(x_train, y_train)

print('The accuracy of the knn classifier is {:.2f} out of 1 on training data'.format(knn.score(x_train, y_train)))
print('The accuracy of the knn classifier is {:.2f} out of 1 on test data'.format(knn.score(x_test, y_test)))

In [None]:
from sklearn.svm import SVC

In [None]:
svm = SVC(kernel='rbf', random_state=0, gamma=.10, C=1.0)
svm.fit(x_train, y_train)

print('The accuracy of the svm classifier on training data is {:.2f} out of 1'.format(svm.score(x_train, y_train)))

print('The accuracy of the svm classifier on test data is {:.2f} out of 1'.format(svm.score(x_test, y_test)))

In [None]:
import xgboost as xgb

# XGBoost classifier

In [None]:
xgb_clf = xgb.XGBClassifier()
xgb_clf = xgb_clf.fit(x_train, y_train)

print('The accuracy of the xgb classifier is {:.2f} out of 1 on training data'.format(xgb_clf.score(x_train, y_train)))
print('The accuracy of the xgb classifier is {:.2f} out of 1 on test data'.format(xgb_clf.score(x_test, y_test)))