In [6]:
import os
import numpy as np
import pandas as pd
import wfdb
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report

# Define a function to extract features from a signal
def extract_features(signal):
    mean_amplitude = np.mean(signal)
    max_amplitude = np.max(signal)
    rms_amplitude = np.sqrt(np.mean(signal ** 2))
    mfccs = np.random.rand(13)  # Placeholder for actual MFCC computation
    return [mean_amplitude, max_amplitude, rms_amplitude] + list(mfccs)

# Directory containing the .hea files
data_dir = "data"

# List to store feature vectors and labels
features_list = []
labels = []  # Replace with actual labels if available

# Process each file in the directory
for file in os.listdir(data_dir):
    if file.endswith(".hea"):
        # Load the record
        record_path = os.path.join(data_dir, file)
        record = wfdb.rdrecord(record_path[:-4])  # Remove .hea to get base name

        # Flatten the signal and extract features
        signal = record.p_signal.flatten()
        features = extract_features(signal)
        
        # Append features and simulated label
        features_list.append(features)
        labels.append(np.random.randint(0, 2))  # Replace with actual labels
        
        
# Create a DataFrame
columns = ['Mean Amplitude', 'Max Amplitude', 'RMS Amplitude'] + [f'MFCC_{i+1}' for i in range(13)] + ['Label']
data = pd.DataFrame(features_list, columns=columns[:-1])
data['Label'] = labels

# Split data into training and test sets
X = data.iloc[:, :-1]  # All columns except the last (Label)
y = data['Label']  # The last column
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an SVM model
model = SVC(kernel='linear', C=1)
model.fit(X_train, y_train)

# Predict and add results to the DataFrame
data['Predicted'] = model.predict(X)
data['Classification'] = data['Predicted'].apply(lambda x: 'Healthy' if x == 0 else 'Pathological')
# Save results to a CSV file
data.to_csv('classification_results.csv', index=False)

# Print summary statistics
classification_counts = data['Classification'].value_counts()
print("Classification Counts:")
print(classification_counts)

# Compute average feature values
average_features = data.iloc[:, :-2].mean()  # Exclude 'Label' and 'Predicted'
print("Average Feature Values:")
print(average_features)

# Classify based on average feature values
average_features_df = pd.DataFrame([average_features], columns=average_features.index).loc[:, X_train.columns]
predicted_class = model.predict(average_features_df)[0]
classification = 'Healthy' if predicted_class == 0 else 'Pathological'
print(f"Classification based on average features: {classification}")

Classification Counts:
Classification
Pathological    112
Healthy          96
Name: count, dtype: int64
Average Feature Values:
Mean Amplitude   -0.000397
Max Amplitude     0.740366
RMS Amplitude     0.196734
MFCC_1            0.494472
MFCC_2            0.521453
MFCC_3            0.508411
MFCC_4            0.511555
MFCC_5            0.489588
MFCC_6            0.500813
MFCC_7            0.512886
MFCC_8            0.494771
MFCC_9            0.452844
MFCC_10           0.496383
MFCC_11           0.500995
MFCC_12           0.492549
MFCC_13           0.466478
Label             0.495192
dtype: float64
Classification based on average features: Pathological
