In [4]:
from functools import cache
import os


@cache
def get_root():
    return os.path.dirname(os.path.dirname(os.path.abspath('main.py')))


In [23]:
import pandas as pd
import re
import glob 
import os

import numpy as np
import scipy.signal as signal
import pywt
from scipy.fft import fft
from scipy.stats import entropy
import pyedflib
import warnings

warnings.filterwarnings("ignore")

# Butterworth band-pass filter
def butter_bandpass(lowcut, highcut, fs, order=5):
    """Creates a Butterworth bandpass filter with the given cutoff frequencies."""
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = signal.butter(order, [low, high], btype='band')
    return b, a

def apply_butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    """Applies a Butterworth bandpass filter to the data."""
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = signal.lfilter(b, a, data)
    return y

# Discrete Wavelet Transform (DWT) for feature extraction
def dwt_features(data, wavelet='db4', level=5):
    """Applies Discrete Wavelet Transform and extracts features from the coefficients."""
    coeffs = pywt.wavedec(data, wavelet, level=level)
    # Extract energy from each level of DWT coefficients
    features = [np.sum(np.square(c)) for c in coeffs]
    return features

# Spectral features (e.g., power spectral density)
def spectral_features(data, fs):
    """Calculates spectral features such as spectral power."""
    freqs, psd = signal.welch(data, fs)
    # Example: Extracting power in different frequency bands
    delta_power = np.trapz(psd[(freqs >= 0.5) & (freqs <= 4)])
    theta_power = np.trapz(psd[(freqs >= 4) & (freqs <= 8)])
    alpha_power = np.trapz(psd[(freqs >= 8) & (freqs <= 13)])
    beta_power = np.trapz(psd[(freqs >= 13) & (freqs <= 30)])
    return [delta_power, theta_power, alpha_power, beta_power]

# Hjorth Parameters
def hjorth_parameters(data):
    """Calculates Hjorth parameters: activity, mobility, and complexity."""
    activity = np.var(data)
    mobility = np.sqrt(np.var(np.diff(data)) / activity)
    complexity = np.sqrt(np.var(np.diff(np.diff(data))) / np.var(np.diff(data)))
    return [activity, mobility, complexity]

# Entropy measures
def entropy_features(data):
    """Calculates entropy measures (e.g., Shannon entropy)."""
    hist, _ = np.histogram(data, bins=256, density=True)
    shannon_entropy = entropy(hist, base=2)  # Shannon Entropy
    return [shannon_entropy]

def extract_features(data, fs):
    """Extracts multiple features from the input data and returns feature names along with values."""
    # Apply bandpass filtering
    filtered_data = apply_butter_bandpass_filter(data, 0.5, 30, fs)
    
    # Extract DWT features
    dwt_feats = dwt_features(filtered_data)
    dwt_feat_names = [f'DWT_Energy_Level_{i+1}' for i in range(len(dwt_feats))]

    # Extract spectral features
    spectral_feats = spectral_features(filtered_data, fs)
    spectral_feat_names = ['Delta_Power', 'Theta_Power', 'Alpha_Power', 'Beta_Power']

    # Calculate Hjorth parameters
    hjorth_feats = hjorth_parameters(filtered_data)
    hjorth_feat_names = ['Hjorth_Activity', 'Hjorth_Mobility', 'Hjorth_Complexity']

    # Calculate entropy features
    entropy_feats = entropy_features(filtered_data)
    entropy_feat_names = ['Shannon_Entropy']

    # Concatenate all features and names into single lists
    all_features = dwt_feats + spectral_feats + hjorth_feats + entropy_feats
    all_feature_names = dwt_feat_names + spectral_feat_names + hjorth_feat_names + entropy_feat_names
    
    return all_features, all_feature_names



# Function to convert time strings to seconds
def time_to_seconds(t):
    try:
        if pd.isnull(t) or t == '':
            return None
        parts = list(map(int, re.split('[:]', str(t))))
        if len(parts) == 3:
            return parts[0] * 3600 + parts[1] * 60 + parts[2]
        elif len(parts) == 2:
            return parts[0] * 60 + parts[1]
        elif len(parts) == 1:
            return parts[0]
        else:
            return None
    except ValueError:
        return None

def parse_intervals(file_path):
    # Extract metadata from filename
    filename_id = file_path.split('/')[-1].split('_')

    id_match = filename_id[0]
    age_match = filename_id[1]
    exp_match = filename_id[2]
    
    # Assign extracted values or None if not found
    
    # Read the file into a DataFrame
    intervals_df = pd.read_csv(file_path, sep='\t', header=None, names=['NN', 'Время', 'Маркер'])
    intervals_df['Начало'] = intervals_df['Время'].apply(time_to_seconds)

    # Ensure 'Начало' column is numeric, removing non-numeric values
    intervals_df['Начало'] = pd.to_numeric(intervals_df['Начало'], errors='coerce')
    intervals_df = intervals_df.dropna(subset=['Начало']).reset_index(drop=True)

    paired_intervals = []
    open_markers_last_time = {}

    for _, row in intervals_df.iterrows():
        marker_base = re.match(r"([a-zA-Z]+)", row['Маркер']).group(0)
        
        if marker_base in open_markers_last_time:
            last_time = open_markers_last_time[marker_base]
            if isinstance(row['Начало'], (int, float)) and isinstance(last_time, (int, float)):
                if row['Начало'] > last_time:
                    paired_intervals.append((last_time, row['Начало'], marker_base))

        open_markers_last_time[marker_base] = row['Начало']

    # Create a DataFrame with valid pairs
    if paired_intervals:
        valid_intervals_df = pd.DataFrame(
            paired_intervals,
            columns=['X_FROM', 'X_TO', 'LABEL']
        )
        # Add extracted metadata as new columns
        valid_intervals_df['ID'] = id_match
        valid_intervals_df['AGE_MONTHS'] = age_match
        valid_intervals_df['EXPERIMENT_TYPE'] = exp_match
    else:
        valid_intervals_df = pd.DataFrame(columns=['X_FROM', 'X_TO', 'LABEL', 'ID', 'AGE_MONTHS', 'EXPERIMENT_TYPE'])

    return valid_intervals_df


# Function to process all .txt files in a folder
def process_folder(folder_path):
    all_intervals = []
    
    for file_path in glob.glob(os.path.join(folder_path, "*.txt")):
        valid_intervals = parse_intervals(file_path)
        
        with pyedflib.EdfReader(file_path[:-4]+".edf") as edf_reader:
            print(file_path[:-4]+".edf")
            n_channels = edf_reader.signals_in_file
            signal_labels = edf_reader.getSignalLabels()
            # Specify the desired channel
            feature_rows = []

            for types in signal_labels:
                channel_index = signal_labels.index(types)
                data = edf_reader.readSignal(channel_index)
                sfreq = edf_reader.getSampleFrequency(channel_index)
                total_duration = len(data) / sfreq

                for _, row in valid_intervals.iterrows():
                    start_time = row['X_FROM']
                    duration = row['X_TO'] - row['X_FROM']
                    end_time = start_time + duration

                    if end_time > total_duration:
                        end_time = total_duration
                        duration = end_time - start_time

                    # Convert times to indices
                    start_idx = int(start_time * sfreq)
                    end_idx = int(end_time * sfreq)
                    # Extract the signal segment
                    signal_data = data[start_idx:end_idx]
                    # print(signal_data)
                    # Extract features from this segment
                    features, feature_names = extract_features(signal_data, 256)
                    # Convert features to DataFrame row and transpose
                    feature_row = pd.DataFrame([features]).T
                    # print(feature_row)

                    feature_row = feature_row.transpose()
                    feature_row.columns = [f"{i}_{types}" for i in feature_names]

                    # Append the feature row
                    feature_rows.append(feature_row)

                # Concatenate all feature rows with the original DataFrame
            features_df = pd.concat(feature_rows, ignore_index=True)
            df_with_features = pd.concat([valid_intervals.reset_index(drop=True), features_df], axis=1)

            all_intervals.append(df_with_features)
    # Concatenate all DataFrames
    all_intervals_df = pd.concat(all_intervals, ignore_index=True)
    return all_intervals_df

# Example usage
folder_path = f'{get_root()}/data/ECoG_fully_marked_(4+2 files, 6 h each)'
all_valid_intervals = process_folder(folder_path)
all_valid_intervals.fillna(0)



/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_fully_marked_(4+2 files, 6 h each)/Ati4x3_9m_Xyl01(Pharm!)_6h.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_fully_marked_(4+2 files, 6 h each)/Ati4x1_15m_BL_6h.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_fully_marked_(4+2 files, 6 h each)/Ati4x1_15m_H2O_6h_fully_marked.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_fully_marked_(4+2 files, 6 h each)/Ati4x1_15m_Dex003(Pharm!)_6h.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_fully_marked_(4+2 files, 6 h each)/Ati4x1_15m_H2O_6h.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_fully_marked_(4+2 files, 6 h each)/Ati4x6_14m_BL_6h_fully_marked.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_fully_marked_(4+2 files, 6 h each)/Ati4x1_15m_Dex003(Pharm!)_6h_fully_marked.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/EC

Unnamed: 0,X_FROM,X_TO,LABEL,ID,AGE_MONTHS,EXPERIMENT_TYPE,DWT_Energy_Level_1_FrL,DWT_Energy_Level_2_FrL,DWT_Energy_Level_3_FrL,DWT_Energy_Level_4_FrL,DWT_Energy_Level_5_FrL,DWT_Energy_Level_6_FrL,Delta_Power_FrL,Theta_Power_FrL,Alpha_Power_FrL,Beta_Power_FrL,Hjorth_Activity_FrL,Hjorth_Mobility_FrL,Hjorth_Complexity_FrL,Shannon_Entropy_FrL,DWT_Energy_Level_1_FrR,DWT_Energy_Level_2_FrR,DWT_Energy_Level_3_FrR,DWT_Energy_Level_4_FrR,DWT_Energy_Level_5_FrR,DWT_Energy_Level_6_FrR,Delta_Power_FrR,Theta_Power_FrR,Alpha_Power_FrR,Beta_Power_FrR,Hjorth_Activity_FrR,Hjorth_Mobility_FrR,Hjorth_Complexity_FrR,Shannon_Entropy_FrR,DWT_Energy_Level_1_OcR,DWT_Energy_Level_2_OcR,DWT_Energy_Level_3_OcR,DWT_Energy_Level_4_OcR,DWT_Energy_Level_5_OcR,DWT_Energy_Level_6_OcR,Delta_Power_OcR,Theta_Power_OcR,Alpha_Power_OcR,Beta_Power_OcR,Hjorth_Activity_OcR,Hjorth_Mobility_OcR,Hjorth_Complexity_OcR,Shannon_Entropy_OcR
0,0.0,20.0,EMPTY,Ati4x3,9m,Xyl01(Pharm!),66.975280,16.266623,7.120781,4.819537,0.871679,0.016364,0.004500,0.002423,0.000464,0.000843,0.011991,0.168599,0.525346,7.056625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,20.0,40.0,EMPTY,Ati4x3,9m,Xyl01(Pharm!),15.991530,15.264707,4.504226,3.865270,0.716347,0.013117,0.001278,0.001967,0.000324,0.000608,0.005005,0.227219,0.537214,7.079165,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,40.0,60.0,EMPTY,Ati4x3,9m,Xyl01(Pharm!),18.059672,11.891088,4.304624,3.282940,0.606289,0.011204,0.001559,0.001679,0.000329,0.000539,0.004745,0.217599,0.533099,7.328802,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,60.0,80.0,EMPTY,Ati4x3,9m,Xyl01(Pharm!),49.681935,124.788543,83.939171,24.257651,1.935142,0.024178,0.005204,0.014976,0.010336,0.005278,0.035549,0.227408,0.387275,6.590252,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,80.0,100.0,EMPTY,Ati4x3,9m,Xyl01(Pharm!),100.133364,185.316210,114.279640,29.948783,2.377830,0.032180,0.011279,0.023088,0.013256,0.006263,0.053907,0.211606,0.381564,6.710461,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38875,0.0,0.0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.472590,11.707972,7.545568,2.821475,0.453721,0.007294,0.001807,0.001391,0.000737,0.000537,0.005013,0.211092,0.469628,6.763051
38876,0.0,0.0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,33.998617,12.818804,8.170615,4.222464,0.419677,0.007606,0.003543,0.001558,0.000863,0.000706,0.007437,0.188876,0.456280,6.788587
38877,0.0,0.0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23.533492,13.455825,10.193713,4.361761,0.525576,0.007713,0.002398,0.001591,0.000914,0.000792,0.006481,0.210536,0.454770,6.808284
38878,0.0,0.0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.683357,8.926117,8.296017,3.560439,0.485088,0.007947,0.001612,0.001203,0.000691,0.000620,0.004610,0.227232,0.480863,6.638668


In [21]:
def process_folder(folder_path):
    all_intervals = []
    
    for file_path in glob.glob(os.path.join(folder_path, "*.txt")):
        valid_intervals = parse_intervals(file_path)
        
        with pyedflib.EdfReader(file_path[:-4]+".edf") as edf_reader:
            print(file_path[:-4]+".edf")
            n_channels = edf_reader.signals_in_file
            signal_labels = edf_reader.getSignalLabels()
            # Specify the desired channel
            feature_rows = []

            for types in signal_labels:
                channel_index = signal_labels.index(types)
                data = edf_reader.readSignal(channel_index)
                sfreq = edf_reader.getSampleFrequency(channel_index)
                total_duration = len(data) / sfreq

                for _, row in valid_intervals.iterrows():
                    start_time = row['X_FROM']
                    end_time = row['X_TO']

                    if end_time > total_duration:
                        end_time = total_duration

                    # Convert times to indices
                    start_idx = int(start_time * sfreq)
                    end_idx = int(end_time * sfreq)
                    # Extract the signal segment
                    signal_data = data[start_idx:end_idx]
                    # print(signal_data)
                    # Extract features from this segment
                    features, feature_names = extract_features(signal_data, 256)
                    # Convert features to DataFrame row and transpose
                    feature_row = pd.DataFrame([features]).T
                    # print(feature_row)

                    feature_row = feature_row.transpose()
                    feature_row.columns = [f"{i}_{types}" for i in feature_names]

                    # Append the feature row
                    feature_rows.append(feature_row)

                # Concatenate all feature rows with the original DataFrame
            features_df = pd.concat(feature_rows, ignore_index=True)
            df_with_features = pd.concat([valid_intervals.reset_index(drop=True), features_df], axis=1)

            all_intervals.append(df_with_features)
    # Concatenate all DataFrames
    all_intervals_df = pd.concat(all_intervals, ignore_index=True)
    return all_intervals_df

# Example usage
folder_path = f'{get_root()}/data/ECoG_fully_marked_(4+2 files, 6 h each)'  # Replace with your folder path
all_valid_intervals = process_folder(folder_path)
all_valid_intervals = all_valid_intervals.dropna(subset=['ID', 'LABEL'])
all_valid_intervals.fillna(0)

/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_fully_marked_(4+2 files, 6 h each)/Ati4x3_9m_Xyl01(Pharm!)_6h.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_fully_marked_(4+2 files, 6 h each)/Ati4x1_15m_BL_6h.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_fully_marked_(4+2 files, 6 h each)/Ati4x1_15m_H2O_6h_fully_marked.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_fully_marked_(4+2 files, 6 h each)/Ati4x1_15m_Dex003(Pharm!)_6h.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_fully_marked_(4+2 files, 6 h each)/Ati4x1_15m_H2O_6h.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_fully_marked_(4+2 files, 6 h each)/Ati4x6_14m_BL_6h_fully_marked.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_fully_marked_(4+2 files, 6 h each)/Ati4x1_15m_Dex003(Pharm!)_6h_fully_marked.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/EC

Unnamed: 0,X_FROM,X_TO,LABEL,ID,AGE_MONTHS,EXPERIMENT_TYPE,DWT_Energy_Level_1_FrL,DWT_Energy_Level_2_FrL,DWT_Energy_Level_3_FrL,DWT_Energy_Level_4_FrL,DWT_Energy_Level_5_FrL,DWT_Energy_Level_6_FrL,Delta_Power_FrL,Theta_Power_FrL,Alpha_Power_FrL,Beta_Power_FrL,Hjorth_Activity_FrL,Hjorth_Mobility_FrL,Hjorth_Complexity_FrL,Shannon_Entropy_FrL,DWT_Energy_Level_1_FrR,DWT_Energy_Level_2_FrR,DWT_Energy_Level_3_FrR,DWT_Energy_Level_4_FrR,DWT_Energy_Level_5_FrR,DWT_Energy_Level_6_FrR,Delta_Power_FrR,Theta_Power_FrR,Alpha_Power_FrR,Beta_Power_FrR,Hjorth_Activity_FrR,Hjorth_Mobility_FrR,Hjorth_Complexity_FrR,Shannon_Entropy_FrR,DWT_Energy_Level_1_OcR,DWT_Energy_Level_2_OcR,DWT_Energy_Level_3_OcR,DWT_Energy_Level_4_OcR,DWT_Energy_Level_5_OcR,DWT_Energy_Level_6_OcR,Delta_Power_OcR,Theta_Power_OcR,Alpha_Power_OcR,Beta_Power_OcR,Hjorth_Activity_OcR,Hjorth_Mobility_OcR,Hjorth_Complexity_OcR,Shannon_Entropy_OcR
0,0.0,20.0,EMPTY,Ati4x3,9m,Xyl01(Pharm!),66.975280,16.266623,7.120781,4.819537,0.871679,0.016364,0.004500,0.002423,0.000464,0.000843,0.011991,0.168599,0.525346,7.056625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,20.0,40.0,EMPTY,Ati4x3,9m,Xyl01(Pharm!),15.991530,15.264707,4.504226,3.865270,0.716347,0.013117,0.001278,0.001967,0.000324,0.000608,0.005005,0.227219,0.537214,7.079165,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,40.0,60.0,EMPTY,Ati4x3,9m,Xyl01(Pharm!),18.059672,11.891088,4.304624,3.282940,0.606289,0.011204,0.001559,0.001679,0.000329,0.000539,0.004745,0.217599,0.533099,7.328802,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,60.0,80.0,EMPTY,Ati4x3,9m,Xyl01(Pharm!),49.681935,124.788543,83.939171,24.257651,1.935142,0.024178,0.005204,0.014976,0.010336,0.005278,0.035549,0.227408,0.387275,6.590252,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,80.0,100.0,EMPTY,Ati4x3,9m,Xyl01(Pharm!),100.133364,185.316210,114.279640,29.948783,2.377830,0.032180,0.011279,0.023088,0.013256,0.006263,0.053907,0.211606,0.381564,6.710461,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36715,21500.0,21520.0,EMPTY,Ati4x3,9m,Xyl01(Pharm!),53.498984,29.746367,13.986074,3.351047,0.370768,0.005983,0.005685,0.003467,0.001434,0.000607,0.012430,0.161241,0.390116,6.740058,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
36716,21520.0,21540.0,EMPTY,Ati4x3,9m,Xyl01(Pharm!),70.774088,37.187841,21.289090,5.105879,0.430781,0.007932,0.007265,0.005043,0.002276,0.001041,0.016807,0.168289,0.385822,6.547762,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
36717,21540.0,21560.0,EMPTY,Ati4x3,9m,Xyl01(Pharm!),52.615974,46.170961,18.361132,4.319636,0.416523,0.005953,0.006196,0.005571,0.001954,0.000754,0.015194,0.165459,0.362963,6.945392,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
36718,21560.0,21580.0,EMPTY,Ati4x3,9m,Xyl01(Pharm!),51.056223,36.622396,18.386127,4.203553,0.424774,0.006106,0.005302,0.005070,0.001902,0.000743,0.013803,0.170465,0.371344,6.820151,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
all_valid_intervals.to_csv("train.csv", index=False)


In [None]:
# Example usage
# Надо удалить битый файл data/ECoG_fully_marked/Ati4x3_9m_Xyl01(Pharm!)_6h.txt
# Переименовать Ati4x3_9m_Xyl01(Pharm!)_6h1.txt в Ati4x3_9m_Xyl01(Pharm!)_6h.txt
folder_path = f'{get_root()}/data/ECoG_golden_standard' 
all_valid_intervals = process_folder(folder_path)
all_valid_intervals.to_csv("validation.csv", index=False)


/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_golden_standard/Ati4y3_12m_BL_6h_edited.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_golden_standard/Ati5x1_10m_BL_6h_edited.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_golden_standard/Ati4y2_11m_BL_6h_edited.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_golden_standard/Ati5x1_11m_BL_6h_edited.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_golden_standard/Ati4x3B_15m_H2O_6h_edited.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_golden_standard/Ati4x1_15m_BL_6h_edited.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_golden_standard/Ati4x1_15m_H2O_6h_edited.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_golden_standard/Ati4x3_12m_BL_6h_edited.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_golden_standard/Ati4x6_14m_H2O_6h_edited.e

In [33]:
all_valid_intervals.to_csv("validation.csv", index=False)


In [39]:
all_valid_intervals[all_valid_intervals.ID=='Ati4y3']

Unnamed: 0,X_FROM,X_TO,LABEL,ID,AGE_MONTHS,EXPERIMENT_TYPE,DWT_Energy_Level_1_FrL,DWT_Energy_Level_2_FrL,DWT_Energy_Level_3_FrL,DWT_Energy_Level_4_FrL,DWT_Energy_Level_5_FrL,DWT_Energy_Level_6_FrL,Delta_Power_FrL,Theta_Power_FrL,Alpha_Power_FrL,Beta_Power_FrL,Hjorth_Activity_FrL,Hjorth_Mobility_FrL,Hjorth_Complexity_FrL,Shannon_Entropy_FrL,DWT_Energy_Level_1_FrR,DWT_Energy_Level_2_FrR,DWT_Energy_Level_3_FrR,DWT_Energy_Level_4_FrR,DWT_Energy_Level_5_FrR,DWT_Energy_Level_6_FrR,Delta_Power_FrR,Theta_Power_FrR,Alpha_Power_FrR,Beta_Power_FrR,Hjorth_Activity_FrR,Hjorth_Mobility_FrR,Hjorth_Complexity_FrR,Shannon_Entropy_FrR,DWT_Energy_Level_1_OcR,DWT_Energy_Level_2_OcR,DWT_Energy_Level_3_OcR,DWT_Energy_Level_4_OcR,DWT_Energy_Level_5_OcR,DWT_Energy_Level_6_OcR,Delta_Power_OcR,Theta_Power_OcR,Alpha_Power_OcR,Beta_Power_OcR,Hjorth_Activity_OcR,Hjorth_Mobility_OcR,Hjorth_Complexity_OcR,Shannon_Entropy_OcR
0,0.0,20.0,EMPTY,Ati4y3,12m,BL,40.243653,14.290934,9.893603,2.401972,0.375553,0.005751,0.003571,0.001810,0.000822,0.000419,0.008340,0.162210,0.429255,6.966838,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,40.0,60.0,EMPTY,Ati4y3,12m,BL,77.330705,29.150397,19.905851,4.116761,0.377041,0.005938,0.007011,0.003675,0.002170,0.000616,0.016285,0.148648,0.372563,7.060856,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,80.0,100.0,EMPTY,Ati4y3,12m,BL,13.741715,11.357756,7.178042,2.873398,0.444803,0.006791,0.001237,0.001532,0.000732,0.000492,0.004439,0.216179,0.469153,6.735683,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,120.0,140.0,EMPTY,Ati4y3,12m,BL,72.859692,30.468718,13.177216,3.122007,0.323350,0.005816,0.006211,0.003861,0.001501,0.000521,0.014468,0.147131,0.378575,6.998132,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,160.0,180.0,EMPTY,Ati4y3,12m,BL,5.889452,4.516121,2.792503,2.951454,0.562337,0.011200,0.000573,0.000563,0.000218,0.000493,0.002082,0.294811,0.571759,7.046930,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
535,21400.0,21420.0,EMPTY,Ati4y3,12m,BL,7.462269,6.009376,2.476960,2.289426,0.463183,0.009530,0.000657,0.000800,0.000194,0.000372,0.002289,0.260170,0.567598,7.246655,,,,,,,,,,,,,,,,,,,,,,,,,,,,
536,21440.0,21460.0,EMPTY,Ati4y3,12m,BL,8.883669,6.270691,2.870276,2.504970,0.462039,0.009578,0.000723,0.000827,0.000211,0.000442,0.002622,0.253157,0.557343,7.255389,,,,,,,,,,,,,,,,,,,,,,,,,,,,
537,21480.0,21500.0,EMPTY,Ati4y3,12m,BL,7.362344,4.314280,3.347370,2.714325,0.498668,0.009354,0.000677,0.000623,0.000261,0.000455,0.002267,0.276522,0.550508,7.274205,,,,,,,,,,,,,,,,,,,,,,,,,,,,
538,21520.0,21540.0,EMPTY,Ati4y3,12m,BL,8.033753,4.524707,3.326098,2.475582,0.387534,0.006673,0.000749,0.000518,0.000287,0.000423,0.002265,0.257887,0.527836,6.957689,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [19]:
all_valid_intervals.to_csv("train.csv", index=False)
all_valid_intervals

Unnamed: 0,X_FROM,X_TO,LABEL,ID,AGE_MONTHS,EXPERIMENT_TYPE,DWT_Energy_Level_1_FrL,DWT_Energy_Level_2_FrL,DWT_Energy_Level_3_FrL,DWT_Energy_Level_4_FrL,DWT_Energy_Level_5_FrL,DWT_Energy_Level_6_FrL,Delta_Power_FrL,Theta_Power_FrL,Alpha_Power_FrL,Beta_Power_FrL,Hjorth_Activity_FrL,Hjorth_Mobility_FrL,Hjorth_Complexity_FrL,Shannon_Entropy_FrL,DWT_Energy_Level_1_FrR,DWT_Energy_Level_2_FrR,DWT_Energy_Level_3_FrR,DWT_Energy_Level_4_FrR,DWT_Energy_Level_5_FrR,DWT_Energy_Level_6_FrR,Delta_Power_FrR,Theta_Power_FrR,Alpha_Power_FrR,Beta_Power_FrR,Hjorth_Activity_FrR,Hjorth_Mobility_FrR,Hjorth_Complexity_FrR,Shannon_Entropy_FrR,DWT_Energy_Level_1_OcR,DWT_Energy_Level_2_OcR,DWT_Energy_Level_3_OcR,DWT_Energy_Level_4_OcR,DWT_Energy_Level_5_OcR,DWT_Energy_Level_6_OcR,Delta_Power_OcR,Theta_Power_OcR,Alpha_Power_OcR,Beta_Power_OcR,Hjorth_Activity_OcR,Hjorth_Mobility_OcR,Hjorth_Complexity_OcR,Shannon_Entropy_OcR
0,0.0,20.0,EMPTY,Ati4y4,13m,BL,9.465120,4.877172,4.779053,2.482929,0.409336,0.007077,0.000888,0.000647,0.000417,0.000445,0.002745,0.243405,0.511615,6.838178,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,20.0,40.0,EMPTY,Ati4y4,13m,BL,9.743399,5.289118,3.546685,2.252065,0.364370,0.006446,0.000874,0.000646,0.000335,0.000393,0.002640,0.234781,0.515431,6.902029,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,40.0,60.0,EMPTY,Ati4y4,13m,BL,9.659637,3.432095,2.952890,2.664100,0.501702,0.010693,0.000780,0.000431,0.000301,0.000429,0.002380,0.268458,0.570514,7.125666,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,60.0,80.0,EMPTY,Ati4y4,13m,BL,8.019988,4.654770,3.989350,2.232510,0.379963,0.006344,0.000768,0.000545,0.000354,0.000441,0.002365,0.253135,0.513020,6.995845,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,80.0,100.0,EMPTY,Ati4y4,13m,BL,10.128898,6.275184,4.311407,2.244360,0.345914,0.005611,0.000829,0.000860,0.000437,0.000408,0.002905,0.229648,0.489272,6.804249,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97192,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,14.301695,9.545716,6.748166,3.083927,0.484985,0.009293,0.001412,0.001110,0.000714,0.000652,0.004263,0.235449,0.495782,6.375799
97193,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12.951760,7.483678,8.885884,3.550965,0.607919,0.011084,0.001359,0.001051,0.000808,0.000620,0.004133,0.245724,0.507596,6.732231
97194,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.964770,11.220260,8.219229,3.688345,0.390388,0.008387,0.001312,0.001367,0.000901,0.000587,0.004408,0.231870,0.473012,6.679600
97195,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,13.978507,11.673446,9.956350,3.710070,0.622207,0.009848,0.001425,0.001440,0.001051,0.000693,0.004942,0.239261,0.480015,6.384767


In [11]:
folder_path = f'{get_root()}/data/ECoG_unmarked'
all_valid_intervals = process_folder(folder_path)
all_valid_intervals.to_csv("test.csv", index=False)


/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_unmarked/Ati4y4_13m_BL_6h.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_unmarked/Dex1y3_14m_BL_6h.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_unmarked/Ati4x1_15m_BL_6h.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_unmarked/Ati5x3_14m_BL_6h.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_unmarked/Ati5x2_12m_BL_6h.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_unmarked/Dex1y2_12m_H2O_6h.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_unmarked/Dex4x2_14m_BL_6h.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_unmarked/Ati5y1_9m_BL_6h.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_unmarked/Ati5x4_13m_BL_6h.edf
/Users/alfa/IdeaProjects/prod/classification-system-ekog/data/ECoG_unmarked/Ati5x1_10m_BL_6h.edf
/Users/alfa/IdeaProjects/prod/

In [12]:
pd.set_option('display.max_columns', 200)
all_valid_intervals

Unnamed: 0,X_FROM,X_TO,LABEL,ID,AGE_MONTHS,EXPERIMENT_TYPE,DWT_Energy_Level_1_FrL,DWT_Energy_Level_2_FrL,DWT_Energy_Level_3_FrL,DWT_Energy_Level_4_FrL,DWT_Energy_Level_5_FrL,DWT_Energy_Level_6_FrL,Delta_Power_FrL,Theta_Power_FrL,Alpha_Power_FrL,Beta_Power_FrL,Hjorth_Activity_FrL,Hjorth_Mobility_FrL,Hjorth_Complexity_FrL,Shannon_Entropy_FrL,DWT_Energy_Level_1_FrR,DWT_Energy_Level_2_FrR,DWT_Energy_Level_3_FrR,DWT_Energy_Level_4_FrR,DWT_Energy_Level_5_FrR,DWT_Energy_Level_6_FrR,Delta_Power_FrR,Theta_Power_FrR,Alpha_Power_FrR,Beta_Power_FrR,Hjorth_Activity_FrR,Hjorth_Mobility_FrR,Hjorth_Complexity_FrR,Shannon_Entropy_FrR,DWT_Energy_Level_1_OcR,DWT_Energy_Level_2_OcR,DWT_Energy_Level_3_OcR,DWT_Energy_Level_4_OcR,DWT_Energy_Level_5_OcR,DWT_Energy_Level_6_OcR,Delta_Power_OcR,Theta_Power_OcR,Alpha_Power_OcR,Beta_Power_OcR,Hjorth_Activity_OcR,Hjorth_Mobility_OcR,Hjorth_Complexity_OcR,Shannon_Entropy_OcR
0,0.0,20.0,EMPTY,Ati4y4,13m,BL,9.465120,4.877172,4.779053,2.482929,0.409336,0.007077,0.000888,0.000647,0.000417,0.000445,0.002745,0.243405,0.511615,6.838178,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,20.0,40.0,EMPTY,Ati4y4,13m,BL,9.743399,5.289118,3.546685,2.252065,0.364370,0.006446,0.000874,0.000646,0.000335,0.000393,0.002640,0.234781,0.515431,6.902029,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,40.0,60.0,EMPTY,Ati4y4,13m,BL,9.659637,3.432095,2.952890,2.664100,0.501702,0.010693,0.000780,0.000431,0.000301,0.000429,0.002380,0.268458,0.570514,7.125666,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,60.0,80.0,EMPTY,Ati4y4,13m,BL,8.019988,4.654770,3.989350,2.232510,0.379963,0.006344,0.000768,0.000545,0.000354,0.000441,0.002365,0.253135,0.513020,6.995845,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,80.0,100.0,EMPTY,Ati4y4,13m,BL,10.128898,6.275184,4.311407,2.244360,0.345914,0.005611,0.000829,0.000860,0.000437,0.000408,0.002905,0.229648,0.489272,6.804249,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97192,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,14.301695,9.545716,6.748166,3.083927,0.484985,0.009293,0.001412,0.001110,0.000714,0.000652,0.004263,0.235449,0.495782,6.375799
97193,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12.951760,7.483678,8.885884,3.550965,0.607919,0.011084,0.001359,0.001051,0.000808,0.000620,0.004133,0.245724,0.507596,6.732231
97194,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.964770,11.220260,8.219229,3.688345,0.390388,0.008387,0.001312,0.001367,0.000901,0.000587,0.004408,0.231870,0.473012,6.679600
97195,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,13.978507,11.673446,9.956350,3.710070,0.622207,0.009848,0.001425,0.001440,0.001051,0.000693,0.004942,0.239261,0.480015,6.384767


In [17]:
import pyedflib

with pyedflib.EdfReader(f'{get_root()}/data/ECoG_fully_marked_(4+2 files, 6 h each)/Ati4x1_15m_BL_6h_fully_marked.edf') as edf_reader:
    n_channels = edf_reader.signals_in_file
    signal_labels = edf_reader.getSignalLabels()


    channel_index = signal_labels.index('FrL')

    data = edf_reader.readSignal(channel_index)
    sfreq = edf_reader.getSampleFrequency(channel_index)
    nyquist = 0.5 * sfreq  # Определяем nyquist здесь
    total_duration = len(data) / sfreq


start_time = 178
duration = 84

end_time = start_time + duration

if end_time > total_duration:
    end_time = total_duration
    duration = end_time - start_time

start_idx = int(start_time * sfreq)
end_idx = int(end_time * sfreq)

signal_data = data[start_idx:end_idx]

features = extract_features(signal_data, 256)

df_output = pd.DataFrame(features, columns=["Feature_Value"]) # ?
df_output.transpose()

KeyboardInterrupt: 

In [18]:
edf_path = f'{get_root()}/data/ECoG_fully_marked_(4+2 files, 6 h each)/Ati4x1_15m_BL_6h_fully_marked.edf'

# Load the EDF file
with pyedflib.EdfReader(edf_path) as edf_reader:
    n_channels = edf_reader.signals_in_file
    signal_labels = edf_reader.getSignalLabels()

    # Specify the desired channel
    channel_index = signal_labels.index('FrL')

    # Load the signal and its sample frequency
    data = edf_reader.readSignal(channel_index)
    sfreq = edf_reader.getSampleFrequency(channel_index)
    nyquist = 0.5 * sfreq
    total_duration = len(data) / sfreq

# List to hold feature rows
feature_rows = []

# Iterate through each row in the dataset
for _, row in all_valid_intervals.iterrows():
    start_time = row['X_FROM']
    duration = row['X_TO'] - row['X_FROM']
    end_time = start_time + duration

    # Adjust `end_time` if it exceeds the total signal duration
    if end_time > total_duration:
        end_time = total_duration
        duration = end_time - start_time

    # Convert times to indices
    start_idx = int(start_time * sfreq)
    end_idx = int(end_time * sfreq)

    # Extract the signal segment
    signal_data = data[start_idx:end_idx]

    # Extract features from this segment
    features = extract_features(signal_data, sfreq)

    # Convert features to DataFrame row and transpose
    feature_row = pd.DataFrame([features]).T
    feature_row = feature_row.transpose()
    feature_row.columns = [f"Feature_{i+1}" for i in range(len(features))]

    # Append the feature row
    feature_rows.append(feature_row)

# Concatenate all feature rows with the original DataFrame
features_df = pd.concat(feature_rows, ignore_index=True)
df_with_features = pd.concat([all_valid_intervals.reset_index(drop=True), features_df], axis=1)
df_with_features

ValueError: cannot convert float NaN to integer

In [None]:
signal_data.shape

(33600,)

In [None]:
signal_data

array([-0.167625 , -0.1756875, -0.1766875, ..., -0.0583125, -0.0336875,
       -0.0354375])

In [None]:
signal_labels

['FrL', 'FrR', 'OcR']

In [None]:
data.shape

(8640400,)

In [None]:
sfreq

400.0

In [None]:
all_valid_intervals.shape

(570, 6)

In [None]:
all_valid_intervals.LABEL.value_counts()

LABEL
swd    363
ds     169
is      38
Name: count, dtype: int64

In [10]:
valid_intervals.LABEL.value_counts()

LABEL
ds    57
is    11
Name: count, dtype: int64

In [None]:
# /Users/s.vandanov/Documents/GitHub/classification-system-ekog/data/ECoG_fully_marked_(4+2 files, 6 h each)
