In [83]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
import numpy as np
from sklearn.impute import KNNImputer
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [84]:
# import dataset
heart_disease = fetch_ucirepo(id=45)

# access data
X = heart_disease.data.features
y = heart_disease.data.targets

y = pd.Series(y.values.flatten(), name='target')

# Convert target variable to binary classification (0 = no disease, 1 = disease)
y = (y > 0).astype(int)  # Convert all non-zero values to 1

In [85]:
# Handle missing values using Nearest Neighbor Hot Deck Imputation (KNN)
imputer = KNNImputer(n_neighbors=5)
X_imputed = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

# Define numerical and categorical features
numeric_features = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak', 'ca']
categorical_features = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'thal']

# Convert categorical columns back to integers
X_imputed[categorical_features] = X_imputed[categorical_features].round(0).astype(int)

# Encode categorical features (if any exist)
label_encoders = {}
for col in categorical_features:
    le = LabelEncoder()
    X_imputed[col] = le.fit_transform(X_imputed[col])
    label_encoders[col] = le  # Store encoders for later decoding if needed

# Define categorical columns
categorical_cols = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'thal']

# Ensure categorical columns are integers after imputation
X_imputed[categorical_cols] = X_imputed[categorical_cols].round().astype(int)

#NADOBLE KASI DI KO ALAM BAKIT AYAW NIYA MAGSTAY AS INTEGER KAPAG DI INULIT, NAGIGING FLOAT KAPAG PINRINT
# Verify
#print(X_imputed.dtypes)
#print(X_imputed[categorical_cols].head())

In [86]:
# Standardize numerical features
scaler = StandardScaler()
X_imputed[numeric_features] = scaler.fit_transform(X_imputed[numeric_features])

In [87]:
# Split dataset (70% train, 30% test, stratified to maintain class balance) TINRY KO MUNA 70-30
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.3, stratify=y, random_state=42)

In [88]:
# Feature selection using Decision Tree
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
importances = dt.feature_importances_
selected_features = X_train.columns[np.argsort(importances)[-8:]]  # Top 8 features

# Create a reduced feature dataset
X_train_selected = X_train[selected_features]
X_test_selected = X_test[selected_features]

# Print selected features
print("Selected Features:", selected_features.tolist())

Selected Features: ['restecg', 'age', 'oldpeak', 'ca', 'chol', 'thalach', 'cp', 'thal']


In [89]:
# Train and evaluate models on full feature set
models = {
    "SVM": SVC(),
    "KNN": KNeighborsClassifier(),
    "Naïve Bayes": GaussianNB()
}

print("\nResults using all features:")
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    specificity = tn / (tn + fp)
    print(f"{name} - Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}, Specificity: {specificity:.4f}")

print("\nResults using selected features:")
for name, model in models.items():
    model.fit(X_train_selected, y_train)
    y_pred = model.predict(X_test_selected)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    specificity = tn / (tn + fp)
    print(f"{name} - Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}, Specificity: {specificity:.4f}")


Results using all features:
SVM - Accuracy: 0.7912, Precision: 0.7556, Recall: 0.8095, F1-score: 0.7816, Specificity: 0.7755
KNN - Accuracy: 0.7363, Precision: 0.6875, Recall: 0.7857, F1-score: 0.7333, Specificity: 0.6939
Naïve Bayes - Accuracy: 0.8681, Precision: 0.8125, Recall: 0.9286, F1-score: 0.8667, Specificity: 0.8163

Results using selected features:
SVM - Accuracy: 0.7802, Precision: 0.7391, Recall: 0.8095, F1-score: 0.7727, Specificity: 0.7551
KNN - Accuracy: 0.7143, Precision: 0.6600, Recall: 0.7857, F1-score: 0.7174, Specificity: 0.6531
Naïve Bayes - Accuracy: 0.8462, Precision: 0.7917, Recall: 0.9048, F1-score: 0.8444, Specificity: 0.7959


In [90]:
# Implement Fuzzy Expert System with Rule-based Classification
def fuzzy_expert_system(row):
    T_S = row["thal"]      # Thallium Scan
    Slope = row["slope"]
    CP = row["cp"]         # Chest Pain Type
    Chol = row["chol"]     # Serum Cholesterol
    F_S = row["ca"]        # Fluoroscopy
    ECG = row["restecg"]   # Resting ECG
    HR = row["thalach"]    # Heart Rate
    BP = row["trestbps"]   # Blood Pressure
    O_P = row["oldpeak"]   # Old Peak

    # Ensure no missing values (fill with median if necessary)
    if pd.isnull([T_S, Slope, CP, Chol, F_S, ECG, HR, BP, O_P]).any():
        return 1  # Default to CAD if missing values exist

    if T_S > 4.5 and Slope <= 1.5 and CP > 3.5 and Chol <= 240.5 and F_S <= 0.5:
        return 0  # Normal
    if T_S > 4.5 and Slope <= 1.5 and CP > 3.5 and Chol <= 240.5 and F_S > 0.5:
        return 1  # CAD
    if T_S > 4.5 and Slope > 1.5 and CP <= 3.5 and ECG <= 1.5 and HR <= 188:
        return 1
    if T_S > 4.5 and Slope > 1.5 and CP <= 3.5 and ECG <= 1.5 and HR > 188:
        return 0
    if T_S > 4.5 and Slope > 1.5 and CP <= 3.5 and ECG > 1.5 and F_S <= 0.5:
        return 0
    if T_S > 4.5 and Slope > 1.5 and CP <= 3.5 and ECG > 1.5 and F_S > 0.5:
        return 1
    if T_S > 4.5 and Slope <= 1.5 and CP <= 3.5 and BP <= 182 and O_P <= 2.4:
        return 0
    if T_S > 4.5 and Slope <= 1.5 and CP <= 3.5 and BP <= 182 and O_P > 2.4:
        return 1
    if T_S > 4.5 and Slope <= 1.5 and CP > 3.5 and Chol > 240.5:
        return 1
    if T_S > 4.5 and Slope <= 1.5 and CP <= 3.5 and BP > 182:
        return 1
    if T_S > 4.5 and Slope > 1.5 and CP > 3.5:
        return 1
    if T_S <= 4.5 and F_S > 0.5 and CP <= 3.5:
        return 0
    if T_S <= 4.5 and F_S > 0.5 and CP > 3.5:
        return 1
    if T_S <= 4.5 and F_S <= 0.5:
        return 0
    return 1  # Default to CAD if no rule matches


In [91]:
# Apply Fuzzy Expert System
y_pred_fuzzy = X_test.apply(lambda row: fuzzy_expert_system(row), axis=1)

# Compute Performance Metrics
accuracy_fuzzy = accuracy_score(y_test, y_pred_fuzzy)
precision_fuzzy = precision_score(y_test, y_pred_fuzzy, zero_division=1)
recall_fuzzy = recall_score(y_test, y_pred_fuzzy)
f1_fuzzy = f1_score(y_test, y_pred_fuzzy)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred_fuzzy).ravel()
specificity_fuzzy = tn / (tn + fp)

print(f"Fuzzy Expert System - Accuracy: {accuracy_fuzzy:.4f}, Precision: {precision_fuzzy:.4f}, Recall: {recall_fuzzy:.4f}, F1-score: {f1_fuzzy:.4f}, Specificity: {specificity_fuzzy:.4f}")

Fuzzy Expert System - Accuracy: 0.5385, Precision: 1.0000, Recall: 0.0000, F1-score: 0.0000, Specificity: 1.0000
