In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Read the dataset into a pandas DataFrame
df = pd.read_csv("cleaned_heart_disease.csv")

# Split into train and test sets
X = df.drop(columns=["target"])
y = df["target"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,target
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,0


In [19]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, ExtraTreesClassifier, VotingClassifier, BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier  # Importing KNN
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd


def run_models(X_train, X_test, y_train, y_test):
    rfmodel = RandomForestClassifier(n_estimators=100)
    gbmodel = GradientBoostingClassifier(n_estimators=100)
    abmodel = AdaBoostClassifier(n_estimators=100)
    etmodel = ExtraTreesClassifier(n_estimators=100)
    lrmodel = LogisticRegression()
    svcmodel = SVC()
    dtmodel = DecisionTreeClassifier()
    knnmodel = KNeighborsClassifier(n_neighbors=5)  # KNN with default number of neighbors = 5
    knnmodel_manhattan = KNeighborsClassifier(n_neighbors=5, metric='manhattan')  # KNN with Manhattan distance
    vcmodel = VotingClassifier(estimators=[('rf', rfmodel), ('gb', gbmodel), ('ab', abmodel), ('et', etmodel), ('lr', lrmodel), ('svc', svcmodel), ('dt', dtmodel)], voting='hard')
    bcmodel = BaggingClassifier(n_estimators=100)

    models = [rfmodel, gbmodel, abmodel, etmodel, lrmodel, svcmodel, dtmodel, knnmodel, knnmodel_manhattan, vcmodel, bcmodel]
    model_names = ['Random Forest', 'Gradient Boosting', 'AdaBoost', 'Extra Trees', 'Logistic Regression', 'SVC', 'Decision Tree', 
                   'K-Nearest Neighbors', 'K-Nearest Neighbors (Manhattan)', 'Voting Classifier', 'Bagging Classifier']
    
    acc_scores = []
    recall_scores = []
    precision_scores = []
    f1_scores = []
    train_acc = []

    for model in models:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        acc_scores.append(accuracy_score(y_test, y_pred))
        recall_scores.append(recall_score(y_test, y_pred, average='micro'))
        precision_scores.append(precision_score(y_test, y_pred, average='micro'))
        f1_scores.append(f1_score(y_test, y_pred, average='micro'))
        train_acc.append(accuracy_score(y_train, model.predict(X_train)))
        
    return pd.DataFrame({
        'Model': model_names, 
        'Accuracy': acc_scores, 
        'Recall': recall_scores, 
        'Precision': precision_scores, 
        'F1': f1_scores, 
        'Training Accuracies': train_acc
    })


In [20]:
table1 = run_models(X_train, X_test, y_train, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [21]:
# Do it again, but with binning the data to be more equal

df['target'] = df['target'].apply(lambda x: 1 if x > 0 else 0)
X = df.drop(columns=["target"])
y = df["target"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

table2 = run_models(X_train, X_test, y_train, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [22]:
print("5 Target variables")
display(table1)
print("\n2 Target variables (binned)")
display(table2)

5 Target variables


Unnamed: 0,Model,Accuracy,Recall,Precision,F1,Training Accuracies
0,Random Forest,0.565217,0.565217,0.565217,0.565217,1.0
1,Gradient Boosting,0.597826,0.597826,0.597826,0.597826,0.875
2,AdaBoost,0.527174,0.527174,0.527174,0.527174,0.565217
3,Extra Trees,0.538043,0.538043,0.538043,0.538043,1.0
4,Logistic Regression,0.581522,0.581522,0.581522,0.581522,0.565217
5,SVC,0.467391,0.467391,0.467391,0.467391,0.501359
6,Decision Tree,0.527174,0.527174,0.527174,0.527174,1.0
7,K-Nearest Neighbors,0.494565,0.494565,0.494565,0.494565,0.612772
8,K-Nearest Neighbors (Manhattan),0.483696,0.483696,0.483696,0.483696,0.612772
9,Voting Classifier,0.603261,0.603261,0.603261,0.603261,0.932065



2 Target variables (binned)


Unnamed: 0,Model,Accuracy,Recall,Precision,F1,Training Accuracies
0,Random Forest,0.809783,0.809783,0.809783,0.809783,1.0
1,Gradient Boosting,0.815217,0.815217,0.815217,0.815217,0.907609
2,AdaBoost,0.793478,0.793478,0.793478,0.793478,0.85462
3,Extra Trees,0.836957,0.836957,0.836957,0.836957,1.0
4,Logistic Regression,0.820652,0.820652,0.820652,0.820652,0.805707
5,SVC,0.73913,0.73913,0.73913,0.73913,0.722826
6,Decision Tree,0.733696,0.733696,0.733696,0.733696,1.0
7,K-Nearest Neighbors,0.73913,0.73913,0.73913,0.73913,0.789402
8,K-Nearest Neighbors (Manhattan),0.744565,0.744565,0.744565,0.744565,0.793478
9,Voting Classifier,0.831522,0.831522,0.831522,0.831522,0.938859


In [23]:
import torch
import torch.nn as nn
import torch.optim as optim


class CNN:
    def __init__(self, input_shape, num_classes):
        self.model = self.create_model(input_shape, num_classes)
        self.model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        
    def create_model(self, input_shape, num_classes):
        model = nn.Sequential(
            nn.Conv2d(1, 32, (3, 3), padding='same', activation='relu', input_shape=input_shape),
            nn.MaxPooling2D((2, 2)),
            nn.Conv2d(32, 64, (3, 3), padding='same', activation='relu'),
            nn.MaxPooling2D((2, 2)),
            nn.Flatten(),
            nn.Dense(128, activation='relu'),
            nn.Dense(num_classes, activation='softmax')
        )
        return model
    
    def fit(self, X_train, y_train, X_test, y_test, epochs=10, batch_size=32):
        self.model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))
        
    def predict(self, X):
        return self.model.predict(X)
    