In [24]:
# Importing relevant libraries

import pandas as pd
import seaborn as sns
from pathlib import Path
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler

In [6]:
# Loading dataset from the Resources folder
df = pd.read_csv(
    Path('Resources/heart_failure_clinical_records_dataset.csv')   
)

# Review the DataFrame
df.head()



Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [7]:
df.columns

Index(['age', 'anaemia', 'creatinine_phosphokinase', 'diabetes',
       'ejection_fraction', 'high_blood_pressure', 'platelets',
       'serum_creatinine', 'serum_sodium', 'sex', 'smoking', 'time',
       'DEATH_EVENT'],
      dtype='object')

In [9]:
# Separating features "X" from target variable "y"
y = df ['DEATH_EVENT'] # target/dependent variables
X = df.drop(columns='DEATH_EVENT') # features/independent variables

In [10]:
# Separating data into training and test
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [16]:
# Scaling Dataset

# Instantiate a StandardScaler instance
scaler = StandardScaler()

# Fit the training data to the standard scaler
X_scaler = scaler.fit(X_train)

# Transform the training data using the scaler
X_train_scaled = X_scaler.transform(X_train)

# Transform the testing data using the scaler
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Instantiate Gradient Boosting Classifier 
gbr = GradientBoostingClassifier(
                                learning_rate=0.1,
                                n_estimators=300,
                                max_depth = 1, 
                                random_state = 42,
                                max_features = 12)

In [23]:
# Perform 5-fold cross-validation
cv_scores = cross_val_score(gbr, X_train, y_train, cv=5)

# Fit the model on the training data
gbr.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred = gbr.predict(X_test_scaled)

# Generate classification report
report = classification_report(y_test, y_pred)


In [25]:
# Generating a confusion matrix for the model
confusion_matrix(y_test, y_pred)

array([[53,  2],
       [ 6, 14]], dtype=int64)

In [26]:
# Generating a classification report for the model

target_labels = ["Death Report","No Death Report"]
print(classification_report(y_test, y_pred, target_names=target_labels))

                 precision    recall  f1-score   support

   Death Report       0.90      0.96      0.93        55
No Death Report       0.88      0.70      0.78        20

       accuracy                           0.89        75
      macro avg       0.89      0.83      0.85        75
   weighted avg       0.89      0.89      0.89        75

