In [11]:
import os
import pandas as pd

def read_eda_files(root_folder):
    all_data = pd.DataFrame(columns=['ID', 'Task', 'EDA'])  # DataFrame to store the EDA data

    # Recursive traversal of all files in the root folder
    for root, dirs, files in os.walk(root_folder):
        for file in files:
            # Check if the file is an EDA data file
            if file.endswith("_eda_data_filtered.csv"):
                # Extract the individual ID and task from the file name
                file_parts = file.split('_')
                id = file_parts[0]  # Assuming the individual ID is the first part of the file name
                task = file_parts[1]  # Assuming the task is the second part of the file name

                # Read the EDA data from the file and add it to the DataFrame
                try:
                    eda_data = pd.read_csv(os.path.join(root, file))
                    eda_values = eda_data.values.flatten()
                    
                    
                    df = pd.DataFrame({'ID': id, 'Task': task, 'EDA': eda_values})
                    all_data = pd.concat([all_data, df], ignore_index=True)
                except Exception as e:
                    print(f"Error reading file {file}: {e}")

    return all_data

# Root folder containing the EDA data files
root_folder = r"C:\Users\manar.gani\Project\PREPROCESSING"
print(root_folder)

# Read all EDA files and store them in a DataFrame
eda_df = read_eda_files(root_folder)

# Display the first few rows of the DataFrame
print(eda_df.head())

C:\Users\manar.gani\Project\PREPROCESSING
     ID      Task                             EDA
0  2ea4  Baseline     0.0;39014;39010.66153676501
1  2ea4  Baseline     0.0;38980;39006.03390851931
2  2ea4  Baseline   0.002;38951;39001.35582154851
3  2ea4  Baseline  0.004;38935;38996.628609727966
4  2ea4  Baseline   0.006;38919;38991.85362939295


In [12]:
eda_df

Unnamed: 0,ID,Task,EDA
0,2ea4,Baseline,0.0;39014;39010.66153676501
1,2ea4,Baseline,0.0;38980;39006.03390851931
2,2ea4,Baseline,0.002;38951;39001.35582154851
3,2ea4,Baseline,0.004;38935;38996.628609727966
4,2ea4,Baseline,0.006;38919;38991.85362939295
...,...,...,...
34967995,y9z6,Video2,59.994;4244;4305.111262790446
34967996,y9z6,Video2,59.995;4246;4305.1097931467475
34967997,y9z6,Video2,59.996;4238;4305.108587412919
34967998,y9z6,Video2,59.997;4226;4305.107610946266


In [13]:
eda_df['EDA'] = eda_df['EDA'].str.split(';').str[-1]

In [14]:
eda_df

Unnamed: 0,ID,Task,EDA
0,2ea4,Baseline,39010.66153676501
1,2ea4,Baseline,39006.03390851931
2,2ea4,Baseline,39001.35582154851
3,2ea4,Baseline,38996.628609727966
4,2ea4,Baseline,38991.85362939295
...,...,...,...
34967995,y9z6,Video2,4305.111262790446
34967996,y9z6,Video2,4305.1097931467475
34967997,y9z6,Video2,4305.108587412919
34967998,y9z6,Video2,4305.107610946266


In [15]:
eda_df.to_csv('filtered_eda_data.csv', index=False)

In [20]:
def eda_stat(array, sampling_freq=500):
    x = np.array(array)
    eda = nk.eda_phasic(x, sampling_freq)
    scr = np.array(eda['EDA_Phasic'])
    scl = np.array(eda['EDA_Tonic'])
    x_axis = np.linspace(0, scl.shape[0]/sampling_freq, scl.shape[0])
    slope = np.polyfit(x_axis,scl,1)[0]
    
    df = pd.DataFrame(data = [x.max(), x.min(), x.mean(), x.std(),
                              stats.kurtosis(x), stats.skew(x), np.quantile(x,0.5),
                              x.max()/x.min(), slope, scr.max(), scr.min(), scr.mean(), scr.std(),
                              scl.max(), scl.min(), scl.mean(), scl.std()]).T

    df.columns = ['max_eda', 'min_eda', 'mean_eda', 'sd_eda', 'ku_eda', 'sk_eda', 'median_eda',
                  'dynrange','scl_slope', 'max_scr', 'min_scr', 'mean_scr', 'sd_scr', 'max_scl', 
                  'min_scl', 'mean_scl', 'sd_scl']
    
    return df

In [21]:
def eda_time(array, sampling_freq=500):
    x = np.array(array)
    eda = nk.eda_phasic(x, sampling_freq)
    scr = np.array(eda['EDA_Phasic'])
    
    _, info = nk.eda_peaks(scr, sampling_freq)
    peaks = info['SCR_Peaks']
    amplitude = info['SCR_Amplitude']
    recovery = info['SCR_RecoveryTime']
    
    nSCR = len(info['SCR_Peaks']) / (x.shape[0]/sampling_freq/60)
    aucSCR = np.trapz(scr)
    meanAmpSCR = np.nanmean(amplitude)
    maxAmpSCR = np.nanmax(amplitude)
    meanRespSCR = np.nanmean(recovery)
    sumAmpSCR = np.nansum(amplitude) / (x.shape[0]/sampling_freq/60)
    sumRespSCR = np.nansum(recovery) / (x.shape[0]/sampling_freq/60)

    df = pd.DataFrame(data = [nSCR, aucSCR, meanAmpSCR, maxAmpSCR, meanRespSCR,
                             sumAmpSCR, sumRespSCR]).T
    
    df.columns = ['nSCR', 'aucSCR', 'meanAmpSCR', 'maxAmpSCR', 'meanRespSCR',
                  'sumAmpSCR', 'sumRespSCR']
    return df

In [26]:
def feature_extraction(root_folder):

    # Recursive traversal of all files in the root folder
    features = pd.DataFrame()
    for root, dirs, files in os.walk(root_folder):
        for file in files:
            # Check if the file is an EDA data file
            if file.endswith('_eda_data_filtered.csv'):  #find all the RR file: "_rr_filtered.csv"
                # Extract the individual ID and task from the file name
                file_parts = file.split('_')
                idy = file_parts[0]  # Assuming the individual ID is the first part of the file name
                task = file_parts[1]  # Assuming the task is the second part of the file name

                # Read the EDA data from the file and add it to the DataFrame
                try:
                    eda_data = pd.read_csv(os.path.join(root, file),  sep=';')
                    # Calcule la moyenne des valeurs de la troisième colonne du fichier rr_data
                    signal_neurokit, info_neurokit=nk.eda_process(eda_df.values, sampling_rate=500)
                    signal_filtered=signal_neurokit['EDA_Clean']
                    df_stat = eda_stat(signal_filtered, sampling_freq)
                    df_time = eda_time(signal_filtered, sampling_freq)
                    identity=pd.DataFrame({'ID':[idy], 'Task':[task]})
                    features= pd.concat([identity, df_time, df_stat], axis=1)
                    

                    
                    
                except Exception as e:
                    print(f"Error reading file {file}: {e}")

    return features

In [27]:
root_folder=r"C:\Users\manar.gani\Project\PREPROCESSING"
features=feature_extraction(root_folder)

Error reading file 2ea4_Baseline_eda_data_filtered.csv: NeuroKit error: we expect the user to provide a vector, i.e., a one-dimensional array (such as a list of values). Current input of shape: (34968000, 3)
Error reading file 2ea4_Breathing_eda_data_filtered.csv: NeuroKit error: we expect the user to provide a vector, i.e., a one-dimensional array (such as a list of values). Current input of shape: (34968000, 3)
Error reading file 2ea4_Counting1_eda_data_filtered.csv: NeuroKit error: we expect the user to provide a vector, i.e., a one-dimensional array (such as a list of values). Current input of shape: (34968000, 3)
Error reading file 2ea4_Counting2_eda_data_filtered.csv: NeuroKit error: we expect the user to provide a vector, i.e., a one-dimensional array (such as a list of values). Current input of shape: (34968000, 3)
Error reading file 2ea4_Counting3_eda_data_filtered.csv: NeuroKit error: we expect the user to provide a vector, i.e., a one-dimensional array (such as a list of val