# Dataset link: 
https://www.timeseriesclassification.com/description.php?Dataset=ECG5000 

# Libraries version
python            3.12.4

pandas            2.2.3

matplotlib        3.9.2

numpy             2.1.3 

scikit-learn      1.5.2

scipy             1.14.1

# Importing the libraries

In [None]:
import matplotlib.pyplot as plt 
import pandas as pd 
from scipy.io import arff
from scipy.stats import iqr
from scipy.signal import welch
import numpy as np 

from sklearn.svm import OneClassSVM
from sklearn.metrics import classification_report

# Loading the dataset

In [None]:
train_data_path = 'ECG5000/ECG5000_TRAIN.arff'
test_data_path = 'ECG5000/ECG5000_TEST.arff'

train_data, _ = arff.loadarff(train_data_path)
test_data, _ = arff.loadarff(test_data_path)

train_df = pd.DataFrame(train_data)
test_df = pd.DataFrame(test_data)

if (train_df['target'].dtype == object):
    train_df['target'] = train_df['target'].apply(lambda x: x.decode('UTF-8'))
if (test_df['target'].dtype == object):
    test_df['target'] = test_df['target'].apply(lambda x: x.decode('UTF-8'))


# Plot single ECG

In [None]:
example_ecg = train_df.iloc[0, :-1]
plt.figure(figsize=(12, 6))
plt.plot(example_ecg.values, alpha=0.7)
plt.title('One ECG')
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.show()

# Preprocessing the dataset
Feature extraction function

In [None]:
def extract_features(signal):
    features = {}
    features['mean'] = np.mean(signal)
    features['std'] = np.std(signal)
    features['min'] = np.min(signal)
    features['max'] = np.max(signal)
    features['iqr'] = iqr(signal)
    features['energy'] = np.sum(np.square(signal))
    f, pxx = welch(signal)
    features['dominant_freq'] = np.argmax(pxx)
    return features

# Extracting data
Here, we are filtering all NORM values, because we want to train our model only on that data. 
For each row, we apply feature extractor created in the cell above. 

In [None]:
normal_train_df = train_df[train_df['target'] == '1']
test_lables = np.where(test_df['target'] == '1', 1, -1)

X_train_features = normal_train_df.drop('target', axis=1).apply(lambda row: extract_features(row.values), axis=1)
X_train_df = pd.DataFrame(X_train_features.tolist())

X_test_features = test_df.drop('target', axis=1).apply(lambda row: extract_features(row.values), axis=1)
X_test_df = pd.DataFrame(X_test_features.tolist())

# Training the model

We are using OneClassSVM, suitable for anomaly detection where features are numerical. 

In [None]:
one_class_svm = OneClassSVM(gamma='auto', kernel='rbf', nu=0.1)
one_class_svm.fit(X_train_df)

# Predictions

In [None]:
y_pred = one_class_svm.predict(X_test_df)
print('One SVM results')
print(classification_report(test_lables, y_pred, target_names=['Anomaly', 'Normal']))

# Plotting different ECGs
Plotting examples of normal and anomalous ECGs. 

In [None]:
normal_predictions = test_df.iloc[y_pred == 1]
anomalous_predictions = test_df.iloc[y_pred == -1]

plt.figure(figsize=(15, 6))
plt.subplot(1, 2, 1)

for i in range(5):
    ecg = normal_predictions.iloc[i, :-1].values
    plt.plot(ecg, alpha=0.7)

plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.title('Normal ECGs')

plt.subplot(1, 2, 2)
for i in range(5):
    ecg = anomalous_predictions.iloc[i, :-1].values 
    plt.plot(ecg, alpha=0.7, color='red')

plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.title('Anomalous ECGs')

plt.show()