In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# 1. Import Libraries

In [None]:
# import libraries
import datetime
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns 
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder,MinMaxScaler , StandardScaler

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LinearRegression,SGDClassifier, RidgeClassifier


# 2. Import Data

In [None]:
# read dataset
df = pd.read_csv("/kaggle/input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv")
df.head()

# 3. Data Cleaning and Visualization

In [None]:
## check Nan value
for i in df.columns:
    print (i+": "+str(df[i].isna().sum()))

In [None]:
#check correlation in feature if correlation in independent feature is more then It may affect on final output of dataset.
correlation  = df.corr()


plt.figure(figsize=(15,9))
sns.heatmap(correlation,annot = True,cmap = 'Blues')

In [None]:
#ejection fraction and serum sodium are showing more correlation as compared to other dataset, will see how these two features are correlated to each others.
plt.figure(figsize=(16,8)) # Adding size to the graph- width by height
# Use `+` as marker; color set as `g` (green); size proportion to Y values
plt.scatter(x = df['ejection_fraction'], y = df.serum_sodium, c='r') 
# set x/y labels
plt.xlabel('ejection_fraction')
plt.ylabel('serum_sodium')
# set title
plt.title('ejection_fraction vs serum_sodium')

In [None]:
#Assign feature variable
x = df.drop("DEATH_EVENT",axis=1)
x.head()



In [None]:
# Assign target variable
y = df["DEATH_EVENT"]
y.head()

In [None]:
# split data into train and test format
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.20,random_state = 40)
print("training data:{}".format(x_train.shape))
print("test data:{}".format(x_test.shape))

In [None]:
df.DEATH_EVENT.value_counts()

In [None]:
ax = sns.countplot(x="DEATH_EVENT", data=df, palette="Set3")

In [None]:
#will check how sex parameter affect on death_event.
plt.figure(figsize = (15,8))
ax = sns.barplot(x="DEATH_EVENT", y="sex", data=df)

In [None]:
#smoking is giving balance data interms in death event
plt.figure(figsize = (15,8))
ax = sns.barplot(x="DEATH_EVENT", y="smoking", data=df)

In [None]:
!pip install plotly
import plotly.express as px

In [None]:
fig = px.scatter(df, x="serum_creatinine", y="age", color="DEATH_EVENT",
                 facet_col="DEATH_EVENT", title="serum_creatinine, age Vs death_event")

fig.update_xaxes(showgrid=False)

fig.show()

# 4. Data Preprocessing

In [None]:
# Use feature scaling to normailize data each feature contributes approximately proportionately to the final output.
standard = StandardScaler()

std_x = standard.fit_transform(x)

In [None]:
results = []

We already split data using train test split, we also used feature scaling now we will use model to train dataset here we will use default parameter.

# 5.Prepare Model

In [None]:
clf_1 = SGDClassifier()



clf_1.fit(x_train,y_train)
y_predicted = clf_1.predict(x_test)
score = clf_1.score(x_test,y_test)


print(score)
results.append(score)

In [None]:
clf_1 = RidgeClassifier()



clf_1.fit(x_train,y_train)
y_predicted = clf_1.predict(x_test)
score = clf_1.score(x_test,y_test)


print(score)
results.append(score)

In [None]:
clf = LogisticRegression()

clf.fit(x_train,y_train)
y_predicted = clf.predict(x_test)
score = clf.score(x_test,y_test)


print(score)
results.append(score)

# 6. Confusion matrix

In [None]:
cnf_matrix = confusion_matrix(y_test, y_predicted)
np.set_printoptions(precision=2)
cnf_matrix

##### Confusion matrix shows us true positve, true negative, false positve, false negative value

In [None]:
import itertools

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

In [None]:
classes = df['DEATH_EVENT'].value_counts()


plt.figure()
plot_confusion_matrix(cnf_matrix, classes=classes.index,
                      title='Confusion matrix, without normalization')
# With normalization
plt.figure()
plot_confusion_matrix(cnf_matrix, classes= classes.index, normalize=True,
                      title='Normalized confusion matrix')

plt.show()

In [None]:
result_df = pd.DataFrame({"ML Models":['SGDClassifier',"Ridge classifier","Logistic Regression"],"Score":results})
result_df