In [1]:
import numpy as np
import mne
import os
import pandas as pd

In [2]:
def process_edf_features(edf_file_path, output_csv_path):
    raw = mne.io.read_raw_edf(edf_file_path, preload=True)

    # Select only odd-numbered channels (1st, 3rd, 5th, etc.)
    odd_channels = raw.get_data()[0::2, :]  # Start at index 0 and take every second channel (0-based index)

    # Compute average signal
    sum_signal = np.sum(odd_channels, axis=0)
    avg_signal = sum_signal / odd_channels.shape[0]
    # Convert the avg_signal array to a DataFrame with each element in a separate column
    df = pd.DataFrame([avg_signal], columns=[f"Time_{i}" for i in range(avg_signal.shape[0])])
    # Add a label column with "epilepsy"
    df['Label'] = 'epilepsy'
    # Save the DataFrame to a CSV file
    df.to_csv("avg_signal.csv", index=False)


In [None]:
path = "process_epilepsy_20to24"
output_dir = "4channels20to25.csv"

# Loop through all EDF files in the directory
for filename in os.listdir(path):
    if filename.endswith('.edf'):
        input_dir = os.path.join(path, filename)
        
        # Process the file and extract features
        process_edf_features(input_dir, output_dir)
        
    print(f"Processed: {input_dir}")

In [5]:
epilepsy = "process_epilepsy_11to13/aaaaaanr_s007_t004.edf"

In [9]:
import numpy as np
import mne


raw = mne.io.read_raw_edf(epilepsy, preload=True)

# Select only odd-numbered channels (1st, 3rd, 5th, etc.)
odd_channels = raw.get_data()[0::2, :]  # Start at index 0 and take every second channel (0-based index)

# Compute average signal
sum_signal = np.sum(odd_channels, axis=0)
avg_signal = sum_signal / odd_channels.shape[0]

avg_signal
# Convert the avg_signal array to a DataFrame with each element in a separate column
df = pd.DataFrame([avg_signal], columns=[f"Time_{i}" for i in range(avg_signal.shape[0])])
# Add a label column with "epilepsy"
df['Label'] = 'epilepsy'
# Save the DataFrame to a CSV file
df.to_csv("avg_signal.csv", index=False)

Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\process_epilepsy_11to13\aaaaaanr_s007_t004.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 198911  =      0.000 ...   776.996 secs...


In [13]:
def process_edf_files(edf_files, output_csv):
    all_data = []
    max_length = 0  # To store the maximum length across all files
    
    # Process each EDF file
    for file in edf_files:
        raw = mne.io.read_raw_edf(file, preload=True)
        odd_channels = raw.get_data()[0::2, :]  # Select odd-numbered channels
        avg_signal = np.mean(odd_channels, axis=0)  # Compute average signal
        
        # Update max_length if this file has more time points
        if avg_signal.shape[0] > max_length:
            max_length = avg_signal.shape[0]
        
        all_data.append(avg_signal)
    
    # Create a DataFrame where each row corresponds to an EDF file
    df = pd.DataFrame()
    for i, data in enumerate(all_data):
        row_data = np.pad(data, (0, max_length - data.shape[0]), 'constant', constant_values=np.nan)  # Pad shorter rows with NaN
        df_row = pd.DataFrame([row_data], columns=[f"Time_{j}" for j in range(max_length)])
        df_row['Label'] = 'epilepsy'  # Add label column
        df = pd.concat([df, df_row], ignore_index=True)
    
    # Save to CSV
    df.to_csv(output_csv, index=False)
    print(f"Data has been saved to {output_csv}")

In [12]:
path = "process_epilepsy_20to24"
output_dir = "test.csv"

# Loop through all EDF files in the directory
for filename in os.listdir(path):
    if filename.endswith('.edf'):
        input_dir = os.path.join(path, filename)
        
        # Process the file and extract features
        process_edf_files(input_dir, output_dir)
        
    print(f"Processed: {input_dir}")

NotImplementedError: Only EDF files are supported, got .

In [7]:
def process_single_edf(file, output_csv):
    raw = mne.io.read_raw_edf(file, preload=True)
    
    # Select odd-numbered channels (1st, 3rd, 5th, etc.)
    odd_channels = raw.get_data()[0::2, :]  # Take odd-numbered channels (0-based index)
    odd_channels = raw.get_data()[1::2, :]  # even
    
    # Compute average signal
    avg_signal = np.mean(odd_channels, axis=0)

    # Create a DataFrame for this file
    df = pd.DataFrame([avg_signal], columns=[f"Time_{i}" for i in range(avg_signal.shape[0])])
    
    # Add a label column with "epilepsy"
    df['Label'] = 'epilepsy'
    
    # Save the DataFrame to a CSV file
    df.to_csv(output_csv, mode='a', header=not os.path.exists(output_csv), index=False)  # Append to CSV

    print(f"Processed file: {file}")

In [8]:
path = "process_epilepsy_20to24_2"
output_csv = "test2.csv"

# Loop through all EDF files in the directory
for filename in os.listdir(path):
    if filename.endswith('.edf'):
        input_file = os.path.join(path, filename)
        
        # Process the file and extract features
        process_single_edf(input_file, output_csv)

    print(f"Processed: {filename}")

Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\process_epilepsy_20to24_2\aaaaakoq_s004_t000.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 346749  =      0.000 ...  1386.996 secs...
Processed file: process_epilepsy_20to24_2\aaaaakoq_s004_t000.edf
Processed: aaaaakoq_s004_t000.edf
Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\process_epilepsy_20to24_2\aaaaakoq_s010_t001.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 324249  =      0.000 ...  1296.996 secs...
Processed file: process_epilepsy_20to24_2\aaaaakoq_s010_t001.edf
Processed: aaaaakoq_s010_t001.edf
Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\process_epilepsy_20to24_2\aaaaakoq_s011_t000.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 308991  =      0.000 ...  1206.996 secs...
Processed file: process_epilepsy_20to24_2\aaaaakoq_s011_t000.edf
Pr

In [12]:
def process_single_edf(file, df, max_length):
    raw = mne.io.read_raw_edf(file, preload=True)
    
    # Select even-numbered channels (2nd, 4th, 6th, etc.)
    even_channels = raw.get_data()[1::2, :]  # Take even-numbered channels (0-based index)
    
    # Compute average signal
    avg_signal = np.mean(even_channels, axis=0)
    
    # Update max_length if this file has more time points
    if avg_signal.shape[0] > max_length:
        max_length = avg_signal.shape[0]
    
    # Pad shorter rows with 0 and ensure consistency across all rows
    row_data = np.pad(avg_signal, (0, max_length - avg_signal.shape[0]), 'constant', constant_values=0)  # Pad shorter rows with 0
    df_row = pd.DataFrame([row_data], columns=[f"Time_{i}" for i in range(max_length)])
    df_row['Label'] = 'no_epilepsy'  # Add label column
    
    # Align existing data to new max_length by adding columns with 0 to shorter rows
    if df.shape[1] < max_length + 1:  # +1 for the Label column
        extra_columns = max_length - (df.shape[1] - 1)  # Subtract 1 for the Label column
        for i in range(df.shape[0]):
            df.loc[i, [f"Time_{j}" for j in range(df.shape[1] - 1, max_length)]] = 0  # Fill with 0
    
    # Concatenate the new row to the DataFrame
    df = pd.concat([df, df_row], ignore_index=True)
    
    return df, max_length


# Main function to process all EDF files in a directory
def process_all_edf_files(path, output_csv):
    df = pd.DataFrame()  # Initialize empty DataFrame to store all data
    max_length = 0  # To keep track of the maximum number of time points
    
    # Loop through all EDF files in the directory
    for filename in os.listdir(path):
        if filename.endswith('.edf'):
            input_file = os.path.join(path, filename)
            
            # Process the file and update the DataFrame
            df, max_length = process_single_edf(input_file, df, max_length)
            print(f"Processed: {filename}")
    
    # Save the final DataFrame to a CSV file
    df.to_csv(output_csv, index=False)
    print(f"Data has been saved to {output_csv}")

In [9]:
import numpy as np
import mne
import pandas as pd
import os

def process_single_edf(file, max_length, output_csv):
    raw = mne.io.read_raw_edf(file, preload=True)
    
    # Select even-numbered channels (2nd, 4th, 6th, etc.)
    even_channels = raw.get_data()[1::2, :]  # Take even-numbered channels (0-based index)
    
    # Compute average signal
    avg_signal = np.mean(even_channels, axis=0)
    
    # Update max_length if this file has more time points
    if avg_signal.shape[0] > max_length:
        max_length = avg_signal.shape[0]
    
    # Pad shorter rows with 0 and ensure consistency across all rows
    row_data = np.pad(avg_signal, (0, max_length - avg_signal.shape[0]), 'constant', constant_values=0)  # Pad shorter rows with 0
    df_row = pd.DataFrame([row_data], columns=[f"Time_{i}" for i in range(max_length)])
    df_row['Label'] = 'epilepsy'  # Add label column
    
    # Save to CSV file (append) without duplicating existing data
    df_row.to_csv(output_csv, mode='a', header=not os.path.exists(output_csv), index=False)
    
    return max_length

# Main function to process all EDF files in a directory
def process_all_edf_files(path, output_csv):
    max_length = 0  # To keep track of the maximum number of time points
    
    # Loop through all EDF files in the directory
    for filename in os.listdir(path):
        if filename.endswith('.edf'):
            input_file = os.path.join(path, filename)
            
            # Process the file and update the DataFrame
            max_length = process_single_edf(input_file, max_length, output_csv)
            print(f"Processed: {filename}")
    
    print(f"Data has been saved to {output_csv}")



In [10]:
# path = "process_noepilepsy_20to24"
path = "yes_under_1m"
output_csv = "yes_even.csv"


process_all_edf_files(path, output_csv)

Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\yes_under_1m\aaaaaanr_s017_t012.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 3327  =      0.000 ...    12.996 secs...
Processed: aaaaaanr_s017_t012.edf
Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\yes_under_1m\aaaaaanr_s017_t030.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 6655  =      0.000 ...    25.996 secs...
Processed: aaaaaanr_s017_t030.edf
Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\yes_under_1m\aaaaaanr_s017_t031.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 2047  =      0.000 ...     7.996 secs...
Processed: aaaaaanr_s017_t031.edf
Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\yes_under_1m\aaaaaanr_s017_t033.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 4351  =      0.000 

In [1]:
import numpy as np
import mne
import pandas as pd
import os

# Function to get the duration of each EDF file
def get_edf_duration(file):
    raw = mne.io.read_raw_edf(file, preload=False)  # Preload=False for faster duration check
    duration = raw.times[-1]  # The last time point gives the duration in seconds
    return duration

# Function to process a single EDF file and append it to the CSV
def process_single_edf(file, max_length, output_csv):
    raw = mne.io.read_raw_edf(file, preload=True)
    
    # Select even-numbered channels (2nd, 4th, 6th, etc.)
    even_channels = raw.get_data()[0::2, :]  # Take even-numbered channels (0-based index)
    
    # Compute average signal
    avg_signal = np.mean(even_channels, axis=0)
    
    # Update max_length if this file has more time points
    if avg_signal.shape[0] > max_length:
        max_length = avg_signal.shape[0]
    
    # Pad shorter rows with 0 and ensure consistency across all rows
    row_data = np.pad(avg_signal, (0, max_length - avg_signal.shape[0]), 'constant', constant_values=0)  # Pad shorter rows with 0
    df_row = pd.DataFrame([row_data], columns=[f"Time_{i}" for i in range(max_length)])
    df_row['Label'] = 'epilepsy'  # Add label column
    
    # Save to CSV file (append) without duplicating existing data
    df_row.to_csv(output_csv, mode='a', header=not os.path.exists(output_csv), index=False)
    
    return max_length

# Main function to process all EDF files in a directory, with longest duration first
def process_all_edf_files(path, output_csv):
    # First, scan all EDF files to find their durations
    edf_files = []
    for filename in os.listdir(path):
        if filename.endswith('.edf'):
            input_file = os.path.join(path, filename)
            duration = get_edf_duration(input_file)  # Get the duration of the EDF file
            edf_files.append((input_file, duration))
    
    # Sort files by duration (descending, longest first)
    edf_files.sort(key=lambda x: x[1], reverse=True)
    
    # Initialize variables
    max_length = 0  # To keep track of the maximum number of time points
    
    # Process each file (starting with the longest one)
    for input_file, duration in edf_files:
        print(f"Processing: {os.path.basename(input_file)} with duration {duration:.2f} seconds")
        
        # Process the file and update the DataFrame
        max_length = process_single_edf(input_file, max_length, output_csv)
    
    print(f"Data has been saved to {output_csv}")

In [2]:
import numpy as np
import mne
import pandas as pd
import os

# Function to get the duration of each EDF file
def get_edf_duration_2(file):
    raw = mne.io.read_raw_edf(file, preload=False)  # Preload=False for faster duration check
    duration = raw.times[-1]  # The last time point gives the duration in seconds
    return duration

# Function to process a single EDF file and append it to the CSV
def process_single_edf_2(file, max_length, output_csv):
    raw = mne.io.read_raw_edf(file, preload=True)
    
    # Select even-numbered channels (2nd, 4th, 6th, etc.)
    even_channels = raw.get_data()[1::2, :]  # Take even-numbered channels (0-based index)
    
    # Compute average signal
    avg_signal = np.mean(even_channels, axis=0)
    
    # Update max_length if this file has more time points
    if avg_signal.shape[0] > max_length:
        max_length = avg_signal.shape[0]
    
    # Pad shorter rows with 0 and ensure consistency across all rows
    row_data = np.pad(avg_signal, (0, max_length - avg_signal.shape[0]), 'constant', constant_values=0)  # Pad shorter rows with 0
    df_row = pd.DataFrame([row_data], columns=[f"Time_{i}" for i in range(max_length)])
    df_row['Label'] = 'epilepsy'  # Add label column
    
    # Save to CSV file (append) without duplicating existing data
    df_row.to_csv(output_csv, mode='a', header=not os.path.exists(output_csv), index=False)
    
    return max_length

# Main function to process all EDF files in a directory, with longest duration first
def process_all_edf_files_2(path, output_csv):
    # First, scan all EDF files to find their durations
    edf_files = []
    for filename in os.listdir(path):
        if filename.endswith('.edf'):
            input_file = os.path.join(path, filename)
            duration = get_edf_duration_2(input_file)  # Get the duration of the EDF file
            edf_files.append((input_file, duration))
    
    # Sort files by duration (descending, longest first)
    edf_files.sort(key=lambda x: x[1], reverse=True)
    
    # Initialize variables
    max_length = 0  # To keep track of the maximum number of time points
    
    # Process each file (starting with the longest one)
    for input_file, duration in edf_files:
        print(f"Processing: {os.path.basename(input_file)} with duration {duration:.2f} seconds")
        
        # Process the file and update the DataFrame
        max_length = process_single_edf_2(input_file, max_length, output_csv)
    
    print(f"Data has been saved to {output_csv}")

In [3]:
path = "yes_under_1m"
# path = "process_noepilepsy_10to13"
# path = "under_1m"
output_csv = "epilepsy_odd.csv" 
process_all_edf_files(path, output_csv)

output_csv2 = "epilepsy_even.csv" 
process_all_edf_files_2(path, output_csv2)

Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\yes_under_1m\aaaaaanr_s017_t012.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\yes_under_1m\aaaaaanr_s017_t030.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\yes_under_1m\aaaaaanr_s017_t031.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\yes_under_1m\aaaaaanr_s017_t033.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\yes_under_1m\aaaaaanr_s018_t003.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\yes_under_1m\aaaaaanr_s018_t014.edf...
EDF file detected
Setting channel

In [15]:
import numpy as np
import mne
import pandas as pd
import os

# Function to get the data from even-numbered channels
def get_even_channel_data(file):
    raw = mne.io.read_raw_edf(file, preload=True)
    even_channels = raw.get_data()[0::2, :]  # Get even-numbered channels (0-based index)
    avg_signal = np.mean(even_channels, axis=0)  # Compute the average signal
    return avg_signal

# First pass: Determine the maximum length across all files
def find_max_length(path):
    max_length = 0
    for filename in os.listdir(path):
        if filename.endswith('.edf'):
            input_file = os.path.join(path, filename)
            avg_signal = get_even_channel_data(input_file)  # Get the average signal
            if avg_signal.shape[0] > max_length:
                max_length = avg_signal.shape[0]  # Update max_length if this file is longer
    return max_length

# Function to process a single EDF file and append it to the CSV
def process_single_edf_2(file, max_length, output_csv, label):
    avg_signal = get_even_channel_data(file)
    
    # Pad shorter signals with 0 and ensure consistency across all rows
    row_data = np.pad(avg_signal, (0, max_length - avg_signal.shape[0]), 'constant', constant_values=0)  # Pad to max_length
    df_row = pd.DataFrame([row_data], columns=[f"Time_{i}" for i in range(max_length)])  # Create a DataFrame
    
    # Add the manually provided label column
    df_row['Label'] = label
    
    # Save to CSV file (append) without duplicating existing data
    df_row.to_csv(output_csv, mode='a', header=not os.path.exists(output_csv), index=False)

# Main function to process all EDF files in a directory, ensuring the same time point length
def process_all_edf_files_2(path, output_csv, label):
    # First, calculate the max length by checking all EDF files
    max_length = find_max_length(path)
    print(f"Maximum time points across all files: {max_length}")
    
    # Process each EDF file
    for filename in os.listdir(path):
        if filename.endswith('.edf'):
            input_file = os.path.join(path, filename)
            print(f"Processing: {os.path.basename(input_file)} with label: {label}")
            
            # Process the file and append data to CSV
            process_single_edf_2(input_file, max_length, output_csv, label)

    print(f"Data has been saved to {output_csv}")

# Example usage:
# You can call this function with either 'epilepsy' or 'no_epilepsy' depending on the files you are processing:
# process_all_edf_files_2('path_to_edf_files', 'output_file.csv', 'epilepsy')
# process_all_edf_files_2('path_to_edf_files', 'output_file.csv', 'no_epilepsy')


In [16]:
path = "no_under_1m"
process_all_edf_files_2(path, 'ni_odd.csv', 'no_epilepsy')


Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\no_under_1m\aaaaaebo_s002_t001.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 5749  =      0.000 ...    22.996 secs...
Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\no_under_1m\aaaaaigj_s004_t000.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 6749  =      0.000 ...    26.996 secs...
Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\no_under_1m\aaaaaigj_s005_t000.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 6749  =      0.000 ...    26.996 secs...
Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\no_under_1m\aaaaalpi_s001_t003.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 3499  =      0.000 ...    13.996 secs...
Extracting EDF parameters from d:\FGW\EDF\v2.0.1_new\no_under_1m\aaaaalyg_s001_t000.