# CARDIAC ARREST CLASSIFICATION 

# Import libraries 

In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split 
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report, roc_auc_score,
    log_loss, matthews_corrcoef
)

# Load Data 

In [2]:
df = pd.read_csv("/kaggle/input/cardiac-arrest-dataset/cardiac arrest dataset.csv")
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


# Data Analysis 

In [3]:
df.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [4]:
df.dtypes

age           int64
sex           int64
cp            int64
trestbps      int64
chol          int64
fbs           int64
restecg       int64
thalach       int64
exang         int64
oldpeak     float64
slope         int64
ca            int64
thal          int64
target        int64
dtype: object

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1025 non-null   int64  
 1   sex       1025 non-null   int64  
 2   cp        1025 non-null   int64  
 3   trestbps  1025 non-null   int64  
 4   chol      1025 non-null   int64  
 5   fbs       1025 non-null   int64  
 6   restecg   1025 non-null   int64  
 7   thalach   1025 non-null   int64  
 8   exang     1025 non-null   int64  
 9   oldpeak   1025 non-null   float64
 10  slope     1025 non-null   int64  
 11  ca        1025 non-null   int64  
 12  thal      1025 non-null   int64  
 13  target    1025 non-null   int64  
dtypes: float64(1), int64(13)
memory usage: 112.2 KB


In [6]:
df.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
count,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0
mean,54.434146,0.69561,0.942439,131.611707,246.0,0.149268,0.529756,149.114146,0.336585,1.071512,1.385366,0.754146,2.323902,0.513171
std,9.07229,0.460373,1.029641,17.516718,51.59251,0.356527,0.527878,23.005724,0.472772,1.175053,0.617755,1.030798,0.62066,0.50007
min,29.0,0.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,48.0,0.0,0.0,120.0,211.0,0.0,0.0,132.0,0.0,0.0,1.0,0.0,2.0,0.0
50%,56.0,1.0,1.0,130.0,240.0,0.0,1.0,152.0,0.0,0.8,1.0,0.0,2.0,1.0
75%,61.0,1.0,2.0,140.0,275.0,0.0,1.0,166.0,1.0,1.8,2.0,1.0,3.0,1.0
max,77.0,1.0,3.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,2.0,4.0,3.0,1.0


In [7]:
df.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

# Data preprocessing and splitting

In [8]:
X = df.drop("target",axis=1)
y = df["target"]

In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [10]:
scaler = MinMaxScaler()

# Fit scaler on training data and transform both train and test
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# (Optional) Convert scaled data back to DataFrame
X_train_scaled = pd.DataFrame(X_train_scaled, columns=X.columns, index=X_train.index)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X.columns, index=X_test.index)

In [11]:
# Show result
print("Scaled Training Data:")
print(X_train_scaled)
print("\nScaled Test Data:")
print(X_test_scaled)

Scaled Training Data:
          age  sex        cp  trestbps      chol  fbs  restecg   thalach  \
835  0.416667  1.0  0.666667  0.226415  0.052511  0.0      0.0  0.419847   
137  0.729167  0.0  0.000000  0.811321  0.454338  0.0      0.5  0.633588   
534  0.520833  0.0  0.666667  0.132075  0.321918  0.0      0.0  0.732824   
495  0.625000  1.0  0.000000  0.386792  0.246575  0.0      0.5  0.687023   
244  0.458333  1.0  0.666667  0.292453  0.271689  1.0      0.0  0.725191   
..        ...  ...       ...       ...       ...  ...      ...       ...   
700  0.250000  1.0  0.666667  0.339623  0.200913  0.0      0.0  0.740458   
71   0.666667  1.0  0.000000  0.433962  0.184932  0.0      0.0  0.511450   
106  0.458333  1.0  0.000000  0.433962  0.394977  0.0      0.5  0.778626   
270  0.291667  1.0  0.000000  0.150943  0.194064  0.0      0.5  0.687023   
860  0.479167  1.0  0.000000  0.169811  0.237443  0.0      0.5  0.679389   

     exang   oldpeak  slope    ca      thal  
835    0.0  0.12903

# Model Implementation 

In [12]:
clf = DecisionTreeClassifier(random_state=42)

# Train the model
clf.fit(X_train_scaled, y_train)

# Predict on test data
y_pred = clf.predict(X_test_scaled)

# Evaluation 

In [13]:
average_type = 'binary'  # use 'macro', 'weighted', or 'micro' for multi-class

# Predict class labels
y_pred = clf.predict(X_test_scaled)

# Predict probabilities (for ROC AUC and Log Loss)
y_proba = clf.predict_proba(X_test_scaled)[:, 1] if len(set(y)) == 2 else None

# Compute evaluation metrics
print(f"Accuracy:  {accuracy_score(y_test, y_pred):.4f}")
print(f"Precision: {precision_score(y_test, y_pred, average=average_type):.4f}")
print(f"Recall:    {recall_score(y_test, y_pred, average=average_type):.4f}")
print(f"F1 Score:  {f1_score(y_test, y_pred, average=average_type):.4f}")
print(f"MCC:       {matthews_corrcoef(y_test, y_pred):.4f}")

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(cm)

# ROC AUC and Log Loss (for binary classification only)
if y_proba is not None:
    print(f"ROC AUC:   {roc_auc_score(y_test, y_proba):.4f}")
    print(f"Log Loss:  {log_loss(y_test, y_proba):.4f}")

# Detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Accuracy:  0.9854
Precision: 1.0000
Recall:    0.9709
F1 Score:  0.9852
MCC:       0.9712

Confusion Matrix:
[[102   0]
 [  3 100]]
ROC AUC:   0.9854
Log Loss:  0.5275

Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.99       102
           1       1.00      0.97      0.99       103

    accuracy                           0.99       205
   macro avg       0.99      0.99      0.99       205
weighted avg       0.99      0.99      0.99       205

