In [1]:
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
from Ml_feature_extractions import *
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report

# Load the data
data = pd.read_csv('ecg_sleep_apnea_dataset.csv')
data.info()
# data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2660 entries, 0 to 2659
Columns: 2501 entries, 0 to Target
dtypes: float64(2500), object(1)
memory usage: 50.8+ MB


In [2]:
# data cleaning
data = data.dropna()

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2660 entries, 0 to 2659
Columns: 2501 entries, 0 to Target
dtypes: float64(2500), object(1)
memory usage: 50.8+ MB


In [3]:
X_raw = data.drop(columns=['Target'])
y = data['Target'].map({'Normal': 0, 'Sleep Apnea': 1})

In [4]:
# feature extraction

X_time_domain_features = pd.DataFrame(X_raw.apply(time_domain_features, axis=1).tolist())
X_frequency_domain_features = pd.DataFrame(X_raw.apply(frequency_domain_features, axis=1).tolist())
# X_statistical_features = pd.DataFrame(X_raw.apply(lambda row: statistical_ml_features(row.to_numpy(), 1), axis=1).tolist())

X = pd.concat([X_time_domain_features, X_frequency_domain_features], axis=1)
X.head()

Unnamed: 0,Mean,Median,Variance,Peak-to-Peak,RMS,Skewness,Kurtosis,Entropy,Power Spectral Density Mean,Low-Frequency Power,High-Frequency Power,Spectral Entropy
0,-0.004083,-0.007762,0.244759,1.489249,0.494748,0.014516,-1.49249,2.232527,0.004845,0.425195,0.119728,0.863392
1,0.015538,0.052688,0.257622,2.165627,0.507802,-0.078445,-1.440116,2.011403,0.004239,0.171849,0.311533,1.780063
2,0.013602,0.063045,0.259179,2.137494,0.509278,-0.109863,-1.437862,2.020186,0.004269,0.171789,0.314522,1.790826
3,-0.003737,-0.011746,0.245282,1.518203,0.495274,0.013777,-1.489838,2.244836,0.004858,0.426447,0.120175,0.862455
4,-0.003419,-0.009266,0.2454,1.500802,0.495391,0.009598,-1.494796,2.239826,0.004865,0.426806,0.120552,0.862292


In [5]:
# data split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(1862, 12) (1862,)
(798, 12) (798,)


In [6]:
# data scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(x_train)
X_test = scaler.transform(x_test)

In [7]:
# grid search for parameters
param_grid = {'C': [0.1, 1, 10, 100], 'gamma': ['scale', 0.01, 0.1, 1], 'kernel': ['linear', 'poly','rbf']}
grid_search = GridSearchCV(svm.SVC(), param_grid, cv=5)
grid_search.fit(X_train, y_train)

print("Best Parameters:", grid_search.best_params_)

Best Parameters: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}


In [8]:
# train the model
best_model = grid_search.best_estimator_
best_model.fit(X_train, y_train)


In [9]:
# evaluation
y_pred = best_model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.981203007518797
              precision    recall  f1-score   support

           0       0.98      0.98      0.98       392
           1       0.98      0.98      0.98       406

    accuracy                           0.98       798
   macro avg       0.98      0.98      0.98       798
weighted avg       0.98      0.98      0.98       798

