In [None]:
pip install kagglehub

In [None]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score


In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("mondejar/mitbih-database")

print("Path to dataset files:", path)

In [None]:
print(os.listdir(path))
print("Dataset path",path)

In [None]:
import pandas as pd

df = pd.read_csv(os.path.join(path, "101.csv"))
print(df.columns)


In [None]:
label_map = {

    'N': 0,  # Normal
    'L': 0,  #Left bundle branch block beat
    'R': 0, #Right bundle branch block beat
    'e': 0,  #Atrial escape beat
    'j': 0,  #Nodal (junctional) escape beat

    'A': 1,  # Supraventricular
    'a': 1,
    'J': 1,
    'S': 1,

    'V': 2,  # Ventricular
    'E': 2,

    'F': 3,  # Fusion

    'Q': 4   # Unknown / paced
}

In [None]:
X_all = []
y_all = []

WINDOW = 180

In [None]:
import csv

In [None]:
for file in os.listdir(path):
    if not file.endswith(".csv"):
        continue
    record_id = file.replace(".csv","")
    csv_path = os.path.join(path,file)
    ann_path = os.path.join(path, record_id + "annotations.txt")

    if not os.path.exists(ann_path):
        continue

    print(f"Processing record {record_id}")

    df = pd.read_csv(csv_path, header=None,names=['sample','MLII','V1']
    ,engine="python",
    quoting=csv.QUOTE_NONE,
    on_bad_lines="skip")

    signal = df["MLII"].to_numpy()

    annotations = pd.read_csv(ann_path,
    delim_whitespace=True,
    skiprows=1,
    header=None,
    names = ['time','sample','symbol','sub','chan','num','aux'],
    engine="python",
    quoting=csv.QUOTE_NONE,
    on_bad_lines="skip")



In [None]:
for _,row in annotations.iterrows():
    position = int(row['sample'])
    symbol = row['symbol']

    if symbol not in label_map:
        continue

    if position -WINDOW <0 or position + WINDOW >= len(signal):
        continue

    beat = signal[position - WINDOW : position + WINDOW]
    X_all.append(beat)
    y_all.append(label_map[symbol])

In [None]:
X_all = np.array(X_all)
y_all = np.array(y_all)

print("Total beats: ",X_all.shape)
print("Labels: ", np.unique(y_all, return_counts=True))

In [None]:
X_train,X_test,y_train,y_test = train_test_split(
    X_all,
    y_all,
    test_size = 0.30,
    random_state = 42,
    stratify=y_all
)

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rf_clf = RandomForestClassifier(
    n_estimators=200,
    max_depth = None,
    random_state=42,
    n_jobs=-1,
)

rf_clf.fit(X_train,y_train)

In [None]:
y_pred = rf_clf.predict(X_test)

In [None]:
RF_acc = accuracy_score(y_pred,y_test)

print("Accuracy of RF Model", RF_acc*100)