In [1]:
!pip install xgboost scikit-learn



In [1]:
import numpy as np
import xgboost as xgb
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from google.colab import drive

In [2]:
drive.mount('/content/drive')
X = np.load("/content/drive/MyDrive/X_mitbih_binary.npy")
y = np.load("/content/drive/MyDrive/y_mitbih_binary.npy")

print("Dataset Loaded")
print("X shape:", X.shape)
print("y shape:", y.shape)

Mounted at /content/drive
Dataset Loaded
X shape: (100033, 200)
y shape: (100033,)


In [3]:
X_train, X_test, y_train, y_test = train_test_split(
      X, y, test_size=0.25, random_state=42, stratify=y)

print("Training Samples:", X_train.shape)
print("Testing Samples:", X_test.shape)

Training Samples: (75024, 200)
Testing Samples: (25009, 200)


In [6]:
from collections import Counter
counter = Counter(y_train)
scale_pos_weight = counter[0] / counter[1]
print("Scale Pos Weight:", scale_pos_weight)

Scale Pos Weight: 3.0004265756638584


In [10]:
model = xgb.XGBClassifier(
      n_estimators=150,          # smaller → less overfitting
      max_depth=5,               # smaller tree → less memorization
      learning_rate=0.05,        # slower learning → more generalization
      subsample=0.8,             # randomness → less overfit
      colsample_bytree=0.8,
      scale_pos_weight=scale_pos_weight,  # class balance fix
      reg_lambda=2,              # L2 regularization
      reg_alpha=1,               # L1 regularization
      eval_metric='logloss',
      random_state=42)
model.fit(X_train, y_train)

In [11]:
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n")
print(confusion_matrix(y_test, y_pred))

Accuracy: 0.987204606341717

Classification Report:

              precision    recall  f1-score   support

           0       0.99      0.99      0.99     18758
           1       0.97      0.97      0.97      6251

    accuracy                           0.99     25009
   macro avg       0.98      0.98      0.98     25009
weighted avg       0.99      0.99      0.99     25009


Confusion Matrix:

[[18598   160]
 [  160  6091]]


In [12]:
model.save_model("/content/drive/MyDrive/xgboost_arrhythmia.json")
from google.colab import files
files.download("/content/drive/MyDrive/xgboost_arrhythmia.json")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>