In [11]:
import pyedflib
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import statistics
import warnings
warnings.filterwarnings('ignore')
from statsmodels.tsa import stattools
from scipy.signal import periodogram
import logging
from itertools import groupby

In [12]:
# Choose EEG or EMG

In [13]:
# Choose if you want to look at EEG or EMG data

data_type = "EEG" # Does not have an effect yet, will be added later when processing anesthesia data
#data_type = "EMG"

In [14]:
# EEG/EMG data

def read_edf_file(file_path):
    """
    Reads an .edf file and returns the EEG and EMG streams as pandas DataFrames.
    """
    f = pyedflib.EdfReader(file_path)

    # Assuming the EEG channel is the first channel and EMG is the second channel
    eeg_signal = f.readSignal(0)
    emg_signal = f.readSignal(1)

    # Extract the channel names for the DataFrame
    eeg_channel_name = f.getSignalLabels()[0]
    emg_channel_name = f.getSignalLabels()[1]

    # Get the sample frequency
    sample_frequency = f.getSampleFrequency(0)  # Assuming both streams have the same frequency

    # Calculate the timestamps for the samples
    n_samples = min(len(eeg_signal), len(emg_signal))
    time = [i / sample_frequency for i in range(n_samples)]

    # Create pandas DataFrame
    df = pd.DataFrame({
        'Time': time,
        eeg_channel_name: eeg_signal[:n_samples],
        emg_channel_name: emg_signal[:n_samples],
    })

    # Close the EdfReader
    f.close()

    return df


# Read file
file = 'Data/edf_293.edf'

# Convert to pandas DataFrame
data = read_edf_file(file)
data = data.iloc[1:] # The first label is NaN

In [15]:
# Labels

# Read data
label_df = pd.read_csv("Data/Data_293.csv")
labels = label_df["NAPS_Numeric"].iloc[1:] # The first label is NaN

# Convert to list
labels = [int(label) for label in labels] 

# Segmenting Data

In [16]:
def segment_data(df, segment_size, step_size = 2):
    """
    Segments time-series data into EEG and EMG segments.

    Parameters:
    - df (DataFrame): The input dataframe containing the columns "Time", "EEG" and "EMG".
    - segment_size (float): The desired size of each segment in seconds.
    - step_size (float, optional): The step size of "Time" in milliseconds. Default is 2 millisecond.

    Returns:
    Tuple of two lists:
    - List of EEG segments.
    - List of EMG segments.
    """

    n_segments = int(df["Time"].iloc[-1]) // segment_size
    eeg_segments = []
    emg_segments = []

    for i in range(n_segments):
        start_idx = int(i* segment_size*1000/step_size)
        end_idx = start_idx + int(segment_size*1000/step_size)
        segment = df.iloc[start_idx:end_idx]
        eeg_segments.append(list(segment["EEG"]))
        emg_segments.append(list(segment["EMG"]))

    return eeg_segments, emg_segments

In [17]:
# Segment the data
segment_size = 4  # seconds
eeg_segments, emg_segments = segment_data(data, segment_size, step_size = 2)

In [18]:
## Choose Train and Test Data Indices

In [19]:
# Choose test data set size for classification later (recommended: 0.2-0.3)

test_size = 0.3

In [20]:
# Split the labels into training and testing set labels
all_indices = np.arange(len(labels))

# Should be the same as in Preprocessing_And_Computing_Persistence Diagrams due to fixed random state
_, _, _, _, train_indices, test_indices = train_test_split(eeg_segments, labels, all_indices, test_size=test_size, random_state=32)

In [21]:
# How many segments per label do you want to analyze?
no_segments = len(labels) # complete data in this case

In [22]:
# Create dictionaries which contain all (test and train) segment indices (values) for each label (key)

train_indices_dict = {}
test_indices_dict = {}

for label in list(set(labels)): 
    indices = [index for index, value in enumerate(labels) if (value == label and index in train_indices)][:no_segments]
    train_indices_dict[label] = indices

for label in list(set(labels)): 
    indices = [index for index, value in enumerate(labels) if (value == label and index in test_indices)][:no_segments]
    test_indices_dict[label] = indices

In [27]:
# Statistical Features

def compute_means(segments, indices_dict, label):
    """
    Computes means of all segments for a certain label

    Parameters:
    - segments (list of lists): Complete EEG/EMG segments
    - indices_dict (dicts): dictionary which contains all segment indices (values) for each label (key) for either train or test set.
    - label (int): Label we want to compute means for. 1, 3, 5 or 7.

    Returns:
    - Means of time series segments (list of floats)
    """
    
    means = []

    for label_idx in indices_dict[label]:
        means.append(statistics.mean(eeg_segments[label_idx]))

    return means
    


def compute_variance(segments, indices_dict, label):
    """
    Computes variances of all segments for a certain label

    Parameters:
    - segments (list of lists): Complete EEG/EMG segments
    - indices_dict (dicts): dictionary which contains all segment indices (values) for each label (key) for either train or test set.
    - label (int): Label we want to compute means for. 1, 3, 5 or 7.

    Returns:
    - Variances of time series segments (list of floats)
    """
    variances = []

    for label_idx in indices_dict[label]:
        variances.append(statistics.variance(segments[label_idx]))

    return variances




def compute_entropy(segments, indices_dict, label, freq = 1):
    """
    Computes entropies of all segments for a certain label

    Parameters:
    - segments (list of lists): Complete EEG/EMG segments
    - indices_dict (dicts): dictionary which contains all segment indices (values) for each label (key) for either train or test set.
    - label (int): Label we want to compute means for. 1, 3, 5 or 7.

    Returns:
    - Entropies of time series segments (list of floats) 
    """
    entropies = []

    for label_idx in indices_dict[label]:

        _, psd = periodogram(segments[label_idx], freq)
        psd_norm = psd / np.sum(psd)
        entropy = np.nansum(psd_norm * np.log2(psd_norm))
        entropies.append(-(entropy / np.log2(psd_norm.size)))

    return entropies


def compute_lumpiness(segments, indices_dict, label, window_size = 30):
    """
    Computes lumpiness of all segments for a certain label

    Parameters:
    - segments (list of lists): Complete EEG/EMG segments
    - indices_dict (dicts): dictionary which contains all segment indices (values) for each label (key) for either train or test set.
    - label (int): Label we want to compute means for. 1, 3, 5 or 7.

    Returns:
    - Lumpinesses of time series segments (list of floats)
    """
    lumpinesses = []

    for label_idx in indices_dict[label]:
        lumpinesses.append(np.var([np.var(x_w) for x_w in np.array_split(segments[label_idx], len(segments[label_idx]) // window_size + 1)]))

    return lumpinesses


def compute_stabilities(segments, indices_dict, label, window_size = 30):
    """
    Computes stabilities of all segments for a certain label

    Parameters:
    - segments (list of lists): Complete EEG/EMG segments
    - indices_dict (dicts): dictionary which contains all segment indices (values) for each label (key) for either train or test set.
    - label (int): Label we want to compute means for. 1, 3, 5 or 7.

    Returns:
    - Stabilities of time series segments (list of floats)
    """
    stabilities = []

    for label_idx in indices_dict[label]:
        stabilities.append(np.var([np.mean(x_w) for x_w in np.array_split(segments[label_idx], len(segments[label_idx]) // window_size + 1)]))

    return stabilities



def compute_hursts(segments, indices_dict, label, lag_size = 30):
    """
    Computes hursts of all segments for a certain label

    Parameters:
    - segments (list of lists): Complete EEG/EMG segments
    - indices_dict (dicts): dictionary which contains all segment indices (values) for each label (key) for either train or test set.
    - label (int): Label we want to compute means for. 1, 3, 5 or 7.

    Returns:
    - Hursts of time series segments (list of floats)
    """
    hursts = []

    for label_idx in indices_dict[label]:
        # Create the range of lag values
        lags = range(2, min(lag_size, len(segments[label_idx]) - 1))
        # Calculate the array of the variances of the lagged differences
        tau = [np.std(np.asarray(segments[label_idx])[lag:] - np.asarray(segments[label_idx])[:-lag]) for lag in lags]
        # Use a linear fit to estimate the Hurst Exponent
        poly = np.polyfit(np.log(lags), np.log(tau), 1)
        # Return the Hurst exponent from the polyfit output
        hursts.append(poly[0] if not np.isnan(poly[0]) else 0)
    return hursts



def compute_standard_dev_of_first_der(segments, indices_dict, label):
    """
    Computes standard deviation of the first derivative of all segments for a certain label

    Parameters:
    - segments (list of lists): Complete EEG/EMG segments
    - indices_dict (dicts): dictionary which contains all segment indices (values) for each label (key) for either train or test set.
    - label (int): Label we want to compute means for. 1, 3, 5 or 7.

    Returns:
    - standard deviation of the first derivative of time series segments (list of floats)
    """
    stds = []

    for label_idx in indices_dict[label]:
        stds.append(np.std(np.gradient(segments[label_idx])))

    return stds

    
    
def compute_crossing_points(segments, indices_dict, label):
    """
    Computes crossing points of all segments for a certain label.
    Crossing points happen when a time series crosses the median line.


    Parameters:
    - segments (list of lists): Complete EEG/EMG segments
    - indices_dict (dicts): dictionary which contains all segment indices (values) for each label (key) for either train or test set.
    - label (int): Label we want to compute means for. 1, 3, 5 or 7.

    Returns:
    - crossing points of time series segments (list of floats)
    """
    
    crossing_points = []

    for label_idx in indices_dict[label]:
        # Calculate the number of crossing points.
        median = np.median(segments[label_idx])
        cp = 0
        for i in range(len(segments[label_idx]) - 1):
            if segments[label_idx][i] <= median < segments[label_idx][i + 1] or segments[label_idx][i] >= median > segments[label_idx][i + 1]:
                cp += 1
        crossing_points.append(cp)
        
    return crossing_points



def compute_binarized_means(segments, indices_dict, label):
    """
    Computes binarized means of all segments for a certain label.
    Converts time series array into a binarized version.
    Time-series values above its mean are given 1, and those below the mean
    are 0. Returns the average value of the binarized vector.


    Parameters:
    - segments (list of lists): Complete EEG/EMG segments
    - indices_dict (dicts): dictionary which contains all segment indices (values) for each label (key) for either train or test set.
    - label (int): Label we want to compute means for. 1, 3, 5 or 7.

    Returns:
    - binarized menas of time series segments (list of floats)
    """
    binarized_means = []

    for label_idx in indices_dict[label]:
        binarized_means.append(np.mean(np.asarray(segments[label_idx]) > np.mean(segments[label_idx])))

    return binarized_means




In [28]:
# Train data features for label 1

train_feature_df_label_1 = pd.DataFrame()

train_feature_df_label_1["Mean"] = compute_means(eeg_segments, train_indices_dict, 1)
train_feature_df_label_1["Variance"] = compute_variance(eeg_segments, train_indices_dict, 1)
train_feature_df_label_1["Entropy"] = compute_entropy(eeg_segments, train_indices_dict, 1)
train_feature_df_label_1["Lumpiness"] = compute_lumpiness(eeg_segments, train_indices_dict, 1)
train_feature_df_label_1["Stability"] = compute_stabilities(eeg_segments, train_indices_dict, 1)
train_feature_df_label_1["Hurst"] = compute_hursts(eeg_segments, train_indices_dict, 1)
train_feature_df_label_1["STD_Derivative"] = compute_standard_dev_of_first_der(eeg_segments, train_indices_dict, 1)
train_feature_df_label_1["Crossing_Points"] = compute_crossing_points(eeg_segments, train_indices_dict, 1)
train_feature_df_label_1["Binarized_Means"] = compute_binarized_means(eeg_segments, train_indices_dict, 1)

train_feature_df_label_1["Label"] = 1


# Train data features for label 1
test_feature_df_label_1 = pd.DataFrame()

test_feature_df_label_1["Mean"] = compute_means(eeg_segments, test_indices_dict, 1)
test_feature_df_label_1["Variance"] = compute_variance(eeg_segments, test_indices_dict, 1)
test_feature_df_label_1["Entropy"] = compute_entropy(eeg_segments, test_indices_dict, 1)
test_feature_df_label_1["Lumpiness"] = compute_lumpiness(eeg_segments, test_indices_dict, 1)
test_feature_df_label_1["Stability"] = compute_stabilities(eeg_segments, test_indices_dict, 1)
test_feature_df_label_1["Hurst"] = compute_hursts(eeg_segments, test_indices_dict, 1)
test_feature_df_label_1["STD_Derivative"] = compute_standard_dev_of_first_der(eeg_segments, test_indices_dict, 1)
test_feature_df_label_1["Crossing_Points"] = compute_crossing_points(eeg_segments, test_indices_dict, 1)
test_feature_df_label_1["Binarized_Means"] = compute_binarized_means(eeg_segments, test_indices_dict, 1)

test_feature_df_label_1["Label"] = 1

In [29]:
# Train data features for label 3

train_feature_df_label_3 = pd.DataFrame()

train_feature_df_label_3["Mean"] = compute_means(eeg_segments, train_indices_dict, 3)
train_feature_df_label_3["Variance"] = compute_variance(eeg_segments, train_indices_dict, 3)
train_feature_df_label_3["Entropy"] = compute_entropy(eeg_segments, train_indices_dict, 3)
train_feature_df_label_3["Lumpiness"] = compute_lumpiness(eeg_segments, train_indices_dict, 3)
train_feature_df_label_3["Stability"] = compute_stabilities(eeg_segments, train_indices_dict, 3)
train_feature_df_label_3["Hurst"] = compute_hursts(eeg_segments, train_indices_dict, 3)
train_feature_df_label_3["STD_Derivative"] = compute_standard_dev_of_first_der(eeg_segments, train_indices_dict, 3)
train_feature_df_label_3["Crossing_Points"] = compute_crossing_points(eeg_segments, train_indices_dict, 3)
train_feature_df_label_3["Binarized_Means"] = compute_binarized_means(eeg_segments, train_indices_dict, 3)

train_feature_df_label_3["Label"] = 3


# Train data features for label 3
test_feature_df_label_3 = pd.DataFrame()

test_feature_df_label_3["Mean"] = compute_means(eeg_segments, test_indices_dict, 3)
test_feature_df_label_3["Variance"] = compute_variance(eeg_segments, test_indices_dict, 3)
test_feature_df_label_3["Entropy"] = compute_entropy(eeg_segments, test_indices_dict, 3)
test_feature_df_label_3["Lumpiness"] = compute_lumpiness(eeg_segments, test_indices_dict, 3)
test_feature_df_label_3["Stability"] = compute_stabilities(eeg_segments, test_indices_dict, 3)
test_feature_df_label_3["Hurst"] = compute_hursts(eeg_segments, test_indices_dict, 3)
test_feature_df_label_3["STD_Derivative"] = compute_standard_dev_of_first_der(eeg_segments, test_indices_dict, 3)
test_feature_df_label_3["Crossing_Points"] = compute_crossing_points(eeg_segments, test_indices_dict, 3)
test_feature_df_label_3["Binarized_Means"] = compute_binarized_means(eeg_segments, test_indices_dict, 3)

test_feature_df_label_3["Label"] = 3

In [30]:
# Train data features for label 5

train_feature_df_label_5 = pd.DataFrame()

train_feature_df_label_5["Mean"] = compute_means(eeg_segments, train_indices_dict, 5)
train_feature_df_label_5["Variance"] = compute_variance(eeg_segments, train_indices_dict, 5)
train_feature_df_label_5["Entropy"] = compute_entropy(eeg_segments, train_indices_dict, 5)
train_feature_df_label_5["Lumpiness"] = compute_lumpiness(eeg_segments, train_indices_dict, 5)
train_feature_df_label_5["Stability"] = compute_stabilities(eeg_segments, train_indices_dict, 5)
train_feature_df_label_5["Hurst"] = compute_hursts(eeg_segments, train_indices_dict, 5)
train_feature_df_label_5["STD_Derivative"] = compute_standard_dev_of_first_der(eeg_segments, train_indices_dict, 5)
train_feature_df_label_5["Crossing_Points"] = compute_crossing_points(eeg_segments, train_indices_dict, 5)
train_feature_df_label_5["Binarized_Means"] = compute_binarized_means(eeg_segments, train_indices_dict, 5)

train_feature_df_label_5["Label"] = 5


# Train data features for label 5
test_feature_df_label_5 = pd.DataFrame()

test_feature_df_label_5["Mean"] = compute_means(eeg_segments, test_indices_dict, 5)
test_feature_df_label_5["Variance"] = compute_variance(eeg_segments, test_indices_dict, 5)
test_feature_df_label_5["Entropy"] = compute_entropy(eeg_segments, test_indices_dict, 5)
test_feature_df_label_5["Lumpiness"] = compute_lumpiness(eeg_segments, test_indices_dict, 5)
test_feature_df_label_5["Stability"] = compute_stabilities(eeg_segments, test_indices_dict, 5)
test_feature_df_label_5["Hurst"] = compute_hursts(eeg_segments, test_indices_dict, 5)
test_feature_df_label_5["STD_Derivative"] = compute_standard_dev_of_first_der(eeg_segments, test_indices_dict, 5)
test_feature_df_label_5["Crossing_Points"] = compute_crossing_points(eeg_segments, test_indices_dict, 5)
test_feature_df_label_5["Binarized_Means"] = compute_binarized_means(eeg_segments, test_indices_dict, 5)

test_feature_df_label_5["Label"] = 5

In [31]:
# Train data features for label 7

train_feature_df_label_7 = pd.DataFrame()

train_feature_df_label_7["Mean"] = compute_means(eeg_segments, train_indices_dict, 7)
train_feature_df_label_7["Variance"] = compute_variance(eeg_segments, train_indices_dict, 7)
train_feature_df_label_7["Entropy"] = compute_entropy(eeg_segments, train_indices_dict, 7)
train_feature_df_label_7["Lumpiness"] = compute_lumpiness(eeg_segments, train_indices_dict, 7)
train_feature_df_label_7["Stability"] = compute_stabilities(eeg_segments, train_indices_dict, 7)
train_feature_df_label_7["Hurst"] = compute_hursts(eeg_segments, train_indices_dict, 7)
train_feature_df_label_7["STD_Derivative"] = compute_standard_dev_of_first_der(eeg_segments, train_indices_dict, 7)
train_feature_df_label_7["Crossing_Points"] = compute_crossing_points(eeg_segments, train_indices_dict, 7)
train_feature_df_label_7["Binarized_Means"] = compute_binarized_means(eeg_segments, train_indices_dict, 7)

train_feature_df_label_7["Label"] = 7


# Train data features for label 7
test_feature_df_label_7 = pd.DataFrame()

test_feature_df_label_7["Mean"] = compute_means(eeg_segments, test_indices_dict, 7)
test_feature_df_label_7["Variance"] = compute_variance(eeg_segments, test_indices_dict, 7)
test_feature_df_label_7["Entropy"] = compute_entropy(eeg_segments, test_indices_dict, 7)
test_feature_df_label_7["Lumpiness"] = compute_lumpiness(eeg_segments, test_indices_dict, 7)
test_feature_df_label_7["Stability"] = compute_stabilities(eeg_segments, test_indices_dict, 7)
test_feature_df_label_7["Hurst"] = compute_hursts(eeg_segments, test_indices_dict, 7)
test_feature_df_label_7["STD_Derivative"] = compute_standard_dev_of_first_der(eeg_segments, test_indices_dict, 7)
test_feature_df_label_7["Crossing_Points"] = compute_crossing_points(eeg_segments, test_indices_dict, 7)
test_feature_df_label_7["Binarized_Means"] = compute_binarized_means(eeg_segments, test_indices_dict, 7)

test_feature_df_label_7["Label"] = 7

In [32]:
train_feature_df = pd.concat([train_feature_df_label_1, train_feature_df_label_3, train_feature_df_label_5, train_feature_df_label_7])
test_feature_df = pd.concat([test_feature_df_label_1, test_feature_df_label_3, test_feature_df_label_5, test_feature_df_label_7])

In [33]:
train_feature_df.to_csv("Features/Train_Statistical_Features.csv")
test_feature_df.to_csv("Features/Test_Statistical_Features.csv")

This code contains modified functions from the python package kats, which itself is only
available for earlier Python versions.

MIT License

Copyright (c) Facebook, Inc. and its affiliates.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
