In [51]:
import pyedflib
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import statistics
import warnings
warnings.filterwarnings('ignore')
from statsmodels.tsa import stattools
from scipy.signal import periodogram
import logging
from itertools import groupby

In [52]:
# Choose if you want to look at EEG or EMG data
data_type = "EEG"
data_type = "EMG"

In [53]:
# choose individuum
subject = "m294"

In [54]:
label_list = [0, 1, 2, 3, 4]

In [55]:
# EEG & EMG data
data = {}

for label in label_list:
    data[label] = pd.read_csv("Data/"+str(subject)+"/run0"+str(label)+"/Time_Series_Data.csv")

# Segmenting Data

In [56]:
def segment_data(df, segment_size, step_size = 2):
    """
    Segments time-series data into EEG and EMG segments.

    Parameters:
    - df (DataFrame): The input dataframe containing the columns "Time", "EEG" and "EMG".
    - segment_size (float): The desired size of each segment in seconds.
    - step_size (float, optional): The step size of "Time" in milliseconds. Default is 2 millisecond.

    Returns:
    Tuple of two lists:
    - List of EEG segments.
    - List of EMG segments.
    """

    n_segments = int(df["time"].iloc[-1]) // segment_size
    eeg_segments = []
    emg_segments = []

    for i in range(n_segments):
        start_idx = int(i* segment_size*1000/step_size)
        end_idx = start_idx + int(segment_size*1000/step_size)
        segment = df.iloc[start_idx:end_idx]
        eeg_segments.append(list(segment["voltage"]))
        emg_segments.append(list(segment["emg"]))

    return eeg_segments, emg_segments

In [57]:
# Segment the data
segment_size = 4  # seconds
eeg_segments = {}
emg_segments = {}

for label in label_list:
    eeg_segments[label], emg_segments[label] = segment_data(data[label], segment_size, step_size = 2)

In [58]:
if data_type == "EEG":
    segments = segments
else:
    segments = eeg_segments

## Choose Train and Test Data Indices

In [59]:
# Statistical Features

def compute_means(segments,  label):
    """
    Computes means of all segments for a certain label

    Parameters:
    - segments (list of lists): Complete EEG/EMG segments
    - indices_dict (dicts): dictionary which contains all segment indices (values) for each label (key) for either train or test set.
    - label (int): Label we want to compute means for. 1, 3, 5 or 7.

    Returns:
    - Means of time series segments (list of floats)
    """
    
    means = []

    for sgmt in segments:
        means.append(statistics.mean(sgmt))

    return means
    


def compute_variance(segments,  label):
    """
    Computes variances of all segments for a certain label

    Parameters:
    - segments (list of lists): Complete EEG/EMG segments
    - indices_dict (dicts): dictionary which contains all segment indices (values) for each label (key) for either train or test set.
    - label (int): Label we want to compute means for. 1, 3, 5 or 7.

    Returns:
    - Variances of time series segments (list of floats)
    """
    variances = []

    for sgmt in segments:
        variances.append(statistics.variance(sgmt))

    return variances




def compute_entropy(segments,  label, freq = 1):
    """
    Computes entropies of all segments for a certain label

    Parameters:
    - segments (list of lists): Complete EEG/EMG segments
    - indices_dict (dicts): dictionary which contains all segment indices (values) for each label (key) for either train or test set.
    - label (int): Label we want to compute means for. 1, 3, 5 or 7.

    Returns:
    - Entropies of time series segments (list of floats) 
    """
    entropies = []

    for sgmt in segments:

        _, psd = periodogram(sgmt, freq)
        psd_norm = psd / np.sum(psd)
        entropy = np.nansum(psd_norm * np.log2(psd_norm))
        entropies.append(-(entropy / np.log2(psd_norm.size)))

    return entropies


def compute_lumpiness(segments,  label, window_size = 30):
    """
    Computes lumpiness of all segments for a certain label

    Parameters:
    - segments (list of lists): Complete EEG/EMG segments
    - indices_dict (dicts): dictionary which contains all segment indices (values) for each label (key) for either train or test set.
    - label (int): Label we want to compute means for. 1, 3, 5 or 7.

    Returns:
    - Lumpinesses of time series segments (list of floats)
    """
    lumpinesses = []

    for sgmt in segments:
        lumpinesses.append(np.var([np.var(x_w) for x_w in np.array_split(sgmt, len(sgmt) // window_size + 1)]))

    return lumpinesses


def compute_stabilities(segments,  label, window_size = 30):
    """
    Computes stabilities of all segments for a certain label

    Parameters:
    - segments (list of lists): Complete EEG/EMG segments
    - indices_dict (dicts): dictionary which contains all segment indices (values) for each label (key) for either train or test set.
    - label (int): Label we want to compute means for. 1, 3, 5 or 7.

    Returns:
    - Stabilities of time series segments (list of floats)
    """
    stabilities = []

    for sgmt in segments:
        stabilities.append(np.var([np.mean(x_w) for x_w in np.array_split(sgmt, len(sgmt) // window_size + 1)]))

    return stabilities



def compute_hursts(segments,  label, lag_size = 30):
    """
    Computes hursts of all segments for a certain label

    Parameters:
    - segments (list of lists): Complete EEG/EMG segments
    - indices_dict (dicts): dictionary which contains all segment indices (values) for each label (key) for either train or test set.
    - label (int): Label we want to compute means for. 1, 3, 5 or 7.

    Returns:
    - Hursts of time series segments (list of floats)
    """
    hursts = []

    for sgmt in segments:
        # Create the range of lag values
        lags = range(2, min(lag_size, len(sgmt) - 1))
        # Calculate the array of the variances of the lagged differences
        tau = [np.std(np.asarray(sgmt)[lag:] - np.asarray(sgmt)[:-lag]) for lag in lags]
        # Use a linear fit to estimate the Hurst Exponent
        poly = np.polyfit(np.log(lags), np.log(tau), 1)
        # Return the Hurst exponent from the polyfit output
        hursts.append(poly[0] if not np.isnan(poly[0]) else 0)
    return hursts



def compute_standard_dev_of_first_der(segments,  label):
    """
    Computes standard deviation of the first derivative of all segments for a certain label

    Parameters:
    - segments (list of lists): Complete EEG/EMG segments
    - indices_dict (dicts): dictionary which contains all segment indices (values) for each label (key) for either train or test set.
    - label (int): Label we want to compute means for. 1, 3, 5 or 7.

    Returns:
    - standard deviation of the first derivative of time series segments (list of floats)
    """
    stds = []

    for sgmt in segments:
        stds.append(np.std(np.gradient(sgmt)))

    return stds

    
    
def compute_crossing_points(segments,  label):
    """
    Computes crossing points of all segments for a certain label.
    Crossing points happen when a time series crosses the median line.


    Parameters:
    - segments (list of lists): Complete EEG/EMG segments
    - indices_dict (dicts): dictionary which contains all segment indices (values) for each label (key) for either train or test set.
    - label (int): Label we want to compute means for. 1, 3, 5 or 7.

    Returns:
    - crossing points of time series segments (list of floats)
    """
    
    crossing_points = []

    for sgmt in segments:
        # Calculate the number of crossing points.
        median = np.median(sgmt)
        cp = 0
        for i in range(len(sgmt) - 1):
            if sgmt[i] <= median < sgmt[i + 1] or sgmt[i] >= median > sgmt[i + 1]:
                cp += 1
        crossing_points.append(cp)
        
    return crossing_points



def compute_binarized_means(segments,  label):
    """
    Computes binarized means of all segments for a certain label.
    Converts time series array into a binarized version.
    Time-series values above its mean are given 1, and those below the mean
    are 0. Returns the average value of the binarized vector.


    Parameters:
    - segments (list of lists): Complete EEG/EMG segments
    - indices_dict (dicts): dictionary which contains all segment indices (values) for each label (key) for either train or test set.
    - label (int): Label we want to compute means for. 1, 3, 5 or 7.

    Returns:
    - binarized menas of time series segments (list of floats)
    """
    binarized_means = []

    for sgmt in segments:
        binarized_means.append(np.mean(np.asarray(sgmt) > np.mean(sgmt)))

    return binarized_means




In [60]:
def create_feature_dataframes(label_list, segments, data_type, subject):
    """
    Create feature dataframes for each label using segment data and concatenate them.
    
    Args:
    - label_list (list): List of labels.
    - segments (dict): Dictionary containing segments for each label.
    - data_type (str): Type of data.
    - subject (str): Subject identifier.
    
    Returns:
    - pd.DataFrame: Concatenated feature dataframe.
    """
    feature_dataframes = {}

    for label in label_list:
        feature_dataframes[label] = pd.DataFrame()
        feature_dataframes[label][str(data_type)+"_Mean"] = compute_means(segments[label], label)
        feature_dataframes[label][str(data_type)+"_Variance"] = compute_variance(segments[label], label)
        feature_dataframes[label][str(data_type)+"_Entropy"] = compute_entropy(segments[label], label)
        feature_dataframes[label][str(data_type)+"_Lumpiness"] = compute_lumpiness(segments[label], label)
        feature_dataframes[label][str(data_type)+"_Stability"] = compute_stabilities(segments[label], label)
        feature_dataframes[label][str(data_type)+"_Hurst"] = compute_hursts(segments[label], label)
        feature_dataframes[label][str(data_type)+"_STD_Derivative"] = compute_standard_dev_of_first_der(segments[label], label)
        feature_dataframes[label][str(data_type)+"_Crossing_Points"] = compute_crossing_points(segments[label], label)
        feature_dataframes[label][str(data_type)+"_Binarized_Means"] = compute_binarized_means(segments[label], label)

        feature_dataframes[label]["Label"] = label
        feature_dataframes[label]["Subject"] = subject
        feature_dataframes[label]["Train"] = True

    # Concatenate dataframes while resetting indices (ignore_index=True)
    feature_df = pd.concat([feature_dataframes[0], feature_dataframes[1], feature_dataframes[2],
                                  feature_dataframes[3], feature_dataframes[4]], ignore_index=True)
    
    return feature_df

feature_df = create_feature_dataframes(label_list, segments, data_type, subject)


In [61]:
feature_df.to_csv("Features/"+str(subject)+"/"+str(data_type)+"/Statistical_Features.csv")

This code contains modified functions from the python package kats, which itself is only
available for earlier Python versions.

MIT License

Copyright (c) Facebook, Inc. and its affiliates.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
