In [None]:
import nbtest
import json
import numpy as np
random_seed = np.random.randint(10000)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("./heart_failure.csv")

In [None]:
df.head(n=10)

In [None]:
df.info()

In [None]:
df.isnull().any()

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
for i in df.columns:
  print(i)
  print(df[f"{i}"].value_counts())

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.pairplot(df, vars=['age', 'anaemia', 'creatinine_phosphokinase', 'diabetes',
       'ejection_fraction', 'high_blood_pressure', 'platelets',
       'serum_creatinine', 'serum_sodium', 'sex', 'smoking', 'time','DEATH_EVENT'], hue='DEATH_EVENT')
plt.show()

In [None]:
korelasyon_matrisi = df.corr().abs()
plt.figure(figsize=(8, 6), dpi=100)
sns.heatmap(korelasyon_matrisi, annot=True, cmap='rocket_r', fmt=".2f", linewidths=1)
plt.title("Korelasyon Heatmap")
plt.show()

In [None]:
print(df.corr().abs()["DEATH_EVENT"].nlargest(15))

In [None]:
col_reduced = df.corr().abs()["DEATH_EVENT"].nlargest(6).index
col_reduced

In [None]:
x = df[['time', 'serum_creatinine', 'ejection_fraction', 'age', 'serum_sodium']].values # Numpy Array
y = df.iloc[:,-1].values # Numpy Array

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2)

In [None]:
from sklearn.metrics import roc_curve, auc, accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train = sc.fit_transform(x_train)
X_test = sc.transform(x_test)

In [None]:
from sklearn.linear_model import LogisticRegression
logr = LogisticRegression(random_state=0)
logr.fit(X_train,y_train)
y_pred = logr.predict(X_test)
cm = confusion_matrix(y_test, y_pred, labels = logr.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = logr.classes_)
disp.plot(cmap="YlGnBu")
plt.show()
print()
print("accuracy: ", accuracy_score(y_test, y_pred))

In [None]:
import sklearn
y_proba = logr.predict_proba(X_test)
fpr, tpr, thresholds = roc_curve(y_test, y_proba[:, 1], pos_label=1)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(10, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()
print(sklearn.metrics.roc_auc_score(y_test, y_pred))

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5, metric='euclidean')
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)
cm = confusion_matrix(y_test, y_pred, labels = knn.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = knn.classes_)
disp.plot(cmap="YlGnBu")
plt.show()
print()
print("accuracy: ", accuracy_score(y_test, y_pred))

In [None]:
import matplotlib.pyplot as plt
y_proba = knn.predict_proba(X_test)
fpr, tpr, thresholds = roc_curve(y_test, y_proba[:, 1], pos_label=1)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(10, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()
print(sklearn.metrics.roc_auc_score(y_test, y_pred))

In [None]:
from sklearn.svm import SVC
svc = SVC(kernel='rbf', probability=True)
svc.fit(X_train,y_train)
y_pred = svc.predict(X_test)
cm = confusion_matrix(y_test, y_pred, labels = svc.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = svc.classes_)
disp.plot(cmap="YlGnBu")
plt.show()
print()
print("accuracy: ", accuracy_score(y_test, y_pred))

In [None]:
import matplotlib.pyplot as plt
y_proba = svc.predict_proba(X_test)
fpr, tpr, thresholds = roc_curve(y_test, y_proba[:, 1], pos_label=1)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(10, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()
print(sklearn.metrics.roc_auc_score(y_test, y_pred))

In [None]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)
cm = confusion_matrix(y_test, y_pred, labels = gnb.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = gnb.classes_)
disp.plot(cmap="YlGnBu")
plt.show()
print()
print("accuracy: ", accuracy_score(y_test, y_pred))

In [None]:
import matplotlib.pyplot as plt
y_proba = gnb.predict_proba(X_test)
fpr, tpr, thresholds = roc_curve(y_test, y_proba[:, 1], pos_label=1)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(10, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()
print(sklearn.metrics.roc_auc_score(y_test, y_pred))

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier(criterion = 'entropy', random_state=10)
dtc.fit(X_train,y_train)
y_pred = dtc.predict(X_test)
cm = confusion_matrix(y_test, y_pred, labels = dtc.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = dtc.classes_)
disp.plot(cmap="YlGnBu")
plt.show()
print()
print("accuracy: ", accuracy_score(y_test, y_pred))

In [None]:
import matplotlib.pyplot as plt
y_proba = dtc.predict_proba(X_test)
fpr, tpr, thresholds = roc_curve(y_test, y_proba[:, 1], pos_label=1)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(10, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()
print(sklearn.metrics.roc_auc_score(y_test, y_pred))

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=35, criterion = 'entropy', random_state=24)
rfc.fit(X_train,y_train)
y_pred = rfc.predict(X_test)
cm = confusion_matrix(y_test, y_pred, labels = rfc.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = rfc.classes_)
disp.plot(cmap="YlGnBu")
plt.show()
print()
print("accuracy: ", accuracy_score(y_test, y_pred))

In [None]:
import matplotlib.pyplot as plt
y_proba = rfc.predict_proba(X_test)
fpr, tpr, thresholds = roc_curve(y_test, y_proba[:, 1], pos_label=1)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(10, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()
print(sklearn.metrics.roc_auc_score(y_test, y_pred))