In [None]:
#!pip install pytest-warnings -->8.4.1 / 0.3.1
#!pip install numpy -->2.3.2
#!pip install pandas -->2.3.1
#!pip install matplotlib -->3.10.5
#!pip install seaborn -->0.13.2
#!pip install scikit-learn -->joblib-1.5.1 scikit-learn-1.7.1 scipy-1.16.1 threadpoolctl-3.6.0

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
df=pd.read_csv('heart_failure_clinical_records_dataset.csv')
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.isna().mean()

In [None]:
df.nunique()

In [None]:
df.describe()

In [None]:
# missing values in descending order
df.isnull().sum().sort_values(ascending=False)


In [None]:
# duplicated values
df.duplicated().sum()

In [None]:
# numerical and categorical features 
Categorical = df.select_dtypes(include=['object'])
Numerical = df.select_dtypes(include=['int64', 'float64'])
print('Categorical features:\n', Categorical)
print('Numerical features:\n', Numerical)

In [None]:
# count target variable
df['DEATH_EVENT'].value_counts()

In [None]:
ax=sns.countplot(x=df['DEATH_EVENT'])

In [None]:
ax=sns.countplot(x='DEATH_EVENT', hue='DEATH_EVENT', data=df)

In [None]:
plt.figure(figsize=(20,20))
sns.heatmap(df.corr(),cmap="GnBu", annot=True)
plt.title("Correlation Heatmap")
plt.show()

In [None]:
sns.pairplot(df, hue='DEATH_EVENT')
plt.show()

In [None]:
# distribution plot of Age for Death Event
sns.distplot(df['age'][df['DEATH_EVENT'] == 1], kde=True, color='red', label='Heart Disease')
sns.distplot(df['age'][df['DEATH_EVENT'] == 0], kde=True, color='green', label='Normal')
plt.legend()

In [None]:
plt.figure(figsize=(20,10))
sns.countplot(x=df['age'], data=df, hue='DEATH_EVENT')
plt.show()

In [None]:
features=['age','creatinine_phosphokinase','ejection_fraction','platelets','serum_creatinine',
          'serum_sodium','time']

In [None]:
for i  in features:
    plt.figure(figsize=(10,7))
    sns.swarmplot(x=df['DEATH_EVENT'], y=df[i], color='black',alpha=0.7)
    sns.boxenplot(x=df['DEATH_EVENT'], y=df[i], palette='coolwarm')
    plt.show()

In [None]:
# separate features and target
X=df.drop(['DEATH_EVENT'],axis=1)
y=df['DEATH_EVENT']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
X_train_scaled = pd.DataFrame(X_train_scaled, columns = X_train.columns)
X_test_scaled = pd.DataFrame(X_test_scaled, columns = X_test.columns)

In [None]:
np.round(X_train.describe(), 1)   # original X_train

In [None]:
np.round(X_train_scaled.describe(), 1)   # scaled X_train

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
lr=LogisticRegression()
lr.fit(X_train_scaled, y_train)

In [None]:
y_pred=lr.predict(X_test_scaled)
y_pred

In [None]:
np.array(y_test)

In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
sns.heatmap(confusion_matrix(y_test,y_pred), annot=True)

In [None]:
print("Classification Report:\n", classification_report(y_test, y_pred))

In [None]:
import sklearn.svm as svm
svm=svm.SVC()
svm.fit(X_train,y_train)

In [None]:
y_pred_svm=svm.predict(X_test)
y_pred_svm

In [None]:
np.array(y_test)

In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred_svm))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_svm))
print("Classification Report:\n", classification_report(y_test, y_pred_svm))

In [None]:
sns.heatmap(confusion_matrix(y_test,y_pred_svm), annot=True, cmap="Blues")

In [None]:
#!pip install xgboost -->-3.0.4
from xgboost import XGBClassifier

In [None]:
xgb = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train, y_train)

In [None]:
y_pred_xgb = xgb.predict(X_test)


In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred_xgb))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_xgb))
print("Classification Report:\n", classification_report(y_test, y_pred_xgb))

In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0]
}
grid_search = GridSearchCV(estimator=xgb,
                           param_grid=param_grid,
                           scoring='accuracy',
                           cv=5,
                           verbose=1)

grid_search.fit(X_train, y_train)

In [None]:
print("Best Parameters:", grid_search.best_params_)
print("Best CV Accuracy:", grid_search.best_score_)


In [None]:
best_xgb = grid_search.best_estimator_
y_pred_best = best_xgb.predict(X_test)

In [None]:
print("Tuned XGBoost Test Accuracy:", accuracy_score(y_test, y_pred_best))


In [None]:
#!pip install tensorflow -->-2.20.0
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
ann = Sequential([
    Dense(16, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')   # sigmoid for binary classification
])


In [None]:
ann.compile(optimizer='adam',
            loss='binary_crossentropy',
            metrics=['accuracy'])

In [None]:
history = ann.fit(X_train_scaled, y_train, 
                  epochs=50, batch_size=16, 
                  validation_split=0.2, verbose=0)


In [None]:
y_pred_ann = (ann.predict(X_test_scaled) > 0.5).astype("int32")
print("ANN Test Accuracy:", accuracy_score(y_test, y_pred_ann))


In [None]:
from tensorflow.keras import callbacks
early_stopping = callbacks.EarlyStopping(min_delta=0.001, patience=10, restore_best_weights=True)

In [None]:
from tensorflow.keras.layers import Dropout
model=Sequential()
model.add(Dense(units=128, activation='relu',input_dim=12))
model.add(Dense(units=32, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(units=1, activation='sigmoid'))

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
model.summary()

In [None]:
history=model.fit(X_train, y_train, batch_size=20, epochs=100, callbacks=[early_stopping],
                  validation_split=0.25)

In [None]:
history_df=pd.DataFrame(history.history)
plt.plot(history_df.loc[:,['loss']],label="Training loss")
plt.plot(history_df.loc[:,['val_loss']],label="Val loss")

plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

In [None]:
print(history_df.columns)
print(history_df['loss'].min(), history_df['loss'].max())
print(history_df['val_loss'].min(), history_df['val_loss'].max())

In [None]:
plt.plot(history_df.loc[:,['accuracy']],label="Training Accuracy")
plt.plot(history_df.loc[:,['val_accuracy']],label="Val Accuracy")
plt.legend()
plt.show()