In [None]:
# download the dataset
!kaggle datasets download -d protobioengineering/mit-bih-arrhythmia-database-modern-2023 -p ./data --unzip


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

DATA_DIR = "./data"
SIGNAL = "MLII"
file_names = [f for f in os.listdir(DATA_DIR) if f.endswith('.csv')]

all_data = {}

# visualize desired signal (first 1000 samples)
for file_name in file_names:
    file_path = os.path.join(DATA_DIR, file_name)
    data = pd.read_csv(file_path)
    all_data[file_name] = data
    if SIGNAL in data.columns:
        plt.figure(figsize=(10, 4))
        plt.plot(data[SIGNAL][:1000])
        plt.title(f"Signal from {file_name}")
        plt.xlabel("Time")
        plt.ylabel("Amplitude")
        plt.show()
    else:
        print(f"'{SIGNAL}' data not found in {file_name}")
        

In [None]:
from scipy.signal import resample

TARGET_SAMPLING_RATE = 1000
standardized_signals = []

# changes signal sampling rate to 1000 samples via a Fourier method
for file_name, data in all_data.items():
    signal = data[SIGNAL].values
    resampled_signal = resample(signal, TARGET_SAMPLING_RATE)
    standardized_signals.append(resampled_signal)
else:
    print(f"'{SIGNAL}' data not found in {file_name}")


In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(-1, 1))

# fits signal values onto a range of -1, 1
normalized_signals = []
for signal in standardized_signals:
    signal = signal.reshape(-1, 1)
    normalized_signal = scaler.fit_transform(signal).flatten()
    normalized_signals.append(normalized_signal)


In [None]:
from scipy.signal import butter, filtfilt

# removes irrelevant low-frequency drift and high frequency noise
def bandpass_filter(signal, low_cutoff=0.5, high_cutoff=50, fs=360, order=4):
    nyquist = 0.5 * fs
    low = low_cutoff / nyquist
    high = high_cutoff / nyquist
    b, a = butter(order, [low, high], btype='band')
    return filtfilt(b, a, signal)

filtered_signals = [bandpass_filter(signal, low_cutoff=0.5, high_cutoff=50, fs=360) for signal in normalized_signals]


In [None]:
import numpy as np

# apply moving averages for smoothing, mitigates high frequency spikes
def moving_average(signal, window_size=5):
    return np.convolve(signal, np.ones(window_size)/window_size, mode='same')

smoothed_signals = [moving_average(signal, window_size=5) for signal in filtered_signals]


In [None]:
from scipy.signal import savgol_filter

# OR apply Savitzky-Golay filter for smoothing, mitigates high frequency spikes
smoothed_signals = [savgol_filter(signal, window_length=11, polyorder=3) for signal in filtered_signals]
