# **Importing Libraries**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings as wr
%matplotlib inline

# **Reading dataset**

In [None]:
data=pd.read_csv('../input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')

In [None]:
print(data)

# Feature Engineering

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.columns

In [None]:
data.shape

In [None]:
data.isnull().sum()

# **Data Analysing and Data Visualisation**

In [None]:
data.hist(figsize=(20,20))
plt.show()

In [None]:
data.columns.values

In [None]:
fig,ax = plt.subplots(1,1)
a = np.array([22,87,5,43,56,73,55,54,11,20,51,5,79,31,27])
ax.hist(a, bins = [0,25,50,75,100])
ax.set_title("Heart Failure Prediction")
ax.set_xticks([0,25,50,75,100])
ax.set_xlabel('Age')
ax.set_ylabel('platelets')
plt.show()

In [None]:
plt.figure(figsize=(15,10))
sns.pairplot(data.drop('platelets',axis=1))
plt.show()

# **Applying K-means clustering Algorithm**

In [None]:
from sklearn.cluster import KMeans

In [None]:
X=data.iloc[:,[3,4]].values
print(X)

In [None]:
km=KMeans(n_clusters=4) #intializing the clusters in 4

# **Fitting and Predicting the Data**

In [None]:
y_predict = km.fit_predict(X)
print(y_predict)

In [None]:
print('Number of Predicted clusters',y_predict)

In [None]:
data['cluster']=y_predict
data.tail()

In [None]:
print('cluster coordinates:',km.cluster_centers_)

In [None]:
data

# **K-Means Visualisation of Clusters**

In [None]:
wr.filterwarnings("ignore")
y_kmeans = data.cluster
plt.figure(figsize=(10,6))
sns.scatterplot(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], color = 'grey', label = 'Cluster 1',s=50)
sns.scatterplot(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], color = 'orange', label = 'Cluster 2',s=50)
sns.scatterplot(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], color = 'green', label = 'Cluster 3',s=50)
sns.scatterplot(X[y_kmeans==1,0], X[y_kmeans==1,1], color='blue', label='Cluster 4', s=50)
sns.scatterplot(km.cluster_centers_[:, 0], km.cluster_centers_[:, 1], color = 'red', 
                label = 'Centroids',s=300,marker='o')
plt.grid(False)
plt.title('Clusters of Heart Failure', fontsize = 16)
plt.xlabel('Number of Heart patients', fontsize = 12)
plt.ylabel('number of Death', fontsize = 12)
plt.legend()
plt.show()

Here we have less number of death and more number of recovery of heart patients.