In [None]:
from pathlib import Path
import pandas as pd


def load_heart_data():
    return pd.read_csv(Path("datasets/ptbdb_normal.csv"))


def load_abnormal_data():
    return pd.read_csv(Path("datasets/ptbdb_abnormal.csv"))


healthy_heart_data = load_heart_data()
unhealthy_heart_data = load_abnormal_data()
healthy_heart_data

In [None]:
dfs = [healthy_heart_data, unhealthy_heart_data]

for df in dfs:
    df.columns = list(range(len(df.columns)))

data = pd.concat(dfs, axis=0).sample(frac=1.0, random_state=1).reset_index(drop=True)
data.rename(columns={data.columns[-1]: "Target"}, inplace=True)
data

In [None]:
X = data.drop("Target", axis=1)
y = data["Target"]

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

import xgboost as xgb

clf = xgb.XGBClassifier(tree_method="hist")

clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])

In [None]:
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    classification_report,
)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average="weighted")
recall = recall_score(y_test, y_pred, average="weighted")
f1 = f1_score(y_test, y_pred, average="weighted")

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

# Classification Report
class_report = classification_report(y_test, y_pred)
print("Classification Report:\n", class_report)

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    "n_estimators": [300],
    "learning_rate": [0.2],
    "max_depth": [7],
    "subsample": [0.8, 0.7],
    "colsample_bytree": [1.0],
}

grid_search = GridSearchCV(
    estimator=clf, param_grid=param_grid, scoring="accuracy", cv=3, verbose=2, n_jobs=-1
)
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

In [None]:
import xgboost as xgb
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    classification_report,
)

# Initialize the classifier with the best hyperparameters
best_clf = xgb.XGBClassifier(
    colsample_bytree=best_params["colsample_bytree"],
    learning_rate=best_params["learning_rate"],
    max_depth=best_params["max_depth"],
    n_estimators=best_params["n_estimators"],
    subsample=best_params["subsample"],
    tree_method="hist",
    random_state=42,
)

# Retrain the classifier on the entire training data
best_clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = best_clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average="weighted")
recall = recall_score(y_test, y_pred, average="weighted")
f1 = f1_score(y_test, y_pred, average="weighted")

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

# Classification Report
class_report = classification_report(y_test, y_pred)
print("Classification Report:\n", class_report)

In [None]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(clf, X, y, cv=5, scoring="accuracy")
print("Cross-Validation Accuracy Scores:", scores)
print("Mean Cross-Validation Accuracy:", scores.mean())

In [None]:
import joblib

# Save the model
joblib.dump(clf, 'xgboost_model.pkl')

# Load the model
clf = joblib.load('xgboost_model.pkl')


In [None]:
from pathlib import Path
import pandas as pd


def load_true_data():
    return pd.read_csv(Path("datasets/mitbih_train.csv"))

data2 = load_true_data()
data2.columns = list(range(len(data2.columns)))

data2.rename(columns={data2.columns[-1]: "Target"}, inplace=True)
data2['Target'] = data2['Target'].apply(lambda x: 1 if x != 0 else 0)
data2


In [None]:
X2 = data2.iloc[:,:-1]
y2 = data2['Target']

X_train_new, X_test_new, y_train_new, y_test_new = train_test_split(X2, y2, test_size=0.3, random_state=42)

# Evaluate the model on the new dataset
best_clf.fit(X_train_new, y_train_new)
y_pred_new = best_clf.predict(X_test_new)
print("Evaluation on New Dataset:")
print(f"Accuracy: {accuracy_score(y_test_new, y_pred_new)}")
print(f"Precision: {precision_score(y_test_new, y_pred_new, average='weighted')}")
print(f"Recall: {recall_score(y_test_new, y_pred_new, average='weighted')}")
print(f"F1 Score: {f1_score(y_test_new, y_pred_new, average='weighted')}")
print(f"Confusion Matrix:\n{confusion_matrix(y_test_new, y_pred_new)}")
print(f"Classification Report:\n{classification_report(y_test_new, y_pred_new)}")

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    "n_estimators": [100],
    "learning_rate": [0.1],
    "max_depth": [5],
    "subsample": [0.9],
    "colsample_bytree": [1.0],
}

grid_search = GridSearchCV(
    estimator=clf, param_grid=param_grid, scoring="accuracy", cv=3, verbose=2, n_jobs=-1
)
grid_search.fit(X_train_new, y_train_new)

best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

In [None]:
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    classification_report,
)

# Initialize the classifier with the best hyperparameters
best2_clf = xgb.XGBClassifier(
    colsample_bytree=best_params["colsample_bytree"],
    learning_rate=best_params["learning_rate"],
    max_depth=best_params["max_depth"],
    n_estimators=best_params["n_estimators"],
    subsample=best_params["subsample"],
    tree_method="hist",
    random_state=42,
)

# Retrain the classifier on the entire training data
best2_clf.fit(X_train_new, y_train_new)

# Make predictions on the test set
y_pred_new = best2_clf.predict(X_test_new)

# Evaluate the model
accuracy = accuracy_score(y_test_new, y_pred_new)
precision = precision_score(y_test_new, y_pred_new, average="weighted")
recall = recall_score(y_test_new, y_pred_new, average="weighted")
f1 = f1_score(y_test_new, y_pred_new, average="weighted")

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test_new, y_pred_new)
print("Confusion Matrix:\n", conf_matrix)

# Classification Report
class_report = classification_report(y_test_new, y_pred_new)
print("Classification Report:\n", class_report)

In [None]:
X2 = data.iloc[:,:-1]
y2 = data['Target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

y_pred_new = best2_clf.predict(X_test)
print("Evaluation on New Dataset:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_new)}")
print(f"Precision: {precision_score(y_test, y_pred_new, average='weighted')}")
print(f"Recall: {recall_score(y_test, y_pred_new, average='weighted')}")
print(f"F1 Score: {f1_score(y_test, y_pred_new, average='weighted')}")
print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred_new)}")
print(f"Classification Report:\n{classification_report(y_test, y_pred_new)}")