In [74]:
# Code to convert EDF to CSV

import pyedflib
import pandas as pd

# Function to convert EDF to CSV
def edf_to_csv(edf_file, csv_file):
    # Open EDF file
    edf_reader = pyedflib.EdfReader(edf_file)
    
    # Get channel names
    channel_names = edf_reader.getSignalLabels()
    
    # Get number of samples
    num_samples = edf_reader.getNSamples()
    
    # Read data from each channel
    data = {}
    for i, channel in enumerate(channel_names):
        data[channel] = edf_reader.readSignal(i)
    
    # Close EDF file
    edf_reader.close()
    
    # Convert data to DataFrame
    df = pd.DataFrame(data)
    
    # Write DataFrame to CSV
    df.to_csv(csv_file, index=False)

# Loop over all EDF files
for i in range(36):
    edf_file = f'Subject{i:02d}_1.edf' 
    csv_file = f'Subject{i:02d}_1.csv'  
    edf_to_csv(edf_file, csv_file)

In [93]:
# Delete the last 2 columns (they don't represent the 19 channels, idk what's that)

import pandas as pd
import os

# Function to delete the last two columns from a CSV file
def delete_last_two_columns(csv_file):
    # Read CSV file into a DataFrame
    df = pd.read_csv(csv_file)
    
    # Remove the last two columns
    df = df.iloc[:, :-2]
    
    # Save the modified DataFrame back to the CSV file
    df.to_csv(csv_file, index=False)

# Loop over all CSV files
for i in range(36):
    csv_file = f'Subject{i:02d}_1.csv' 
    delete_last_two_columns(csv_file)

In [None]:
import pandas as pd

# Number of rows to keep (60 seconds * 500 samples/second)
rows_to_keep = 60 * 500

# Loop through all 36 subjects
for i in range(36):
    file_name = f'Subject{i:02d}_1.csv'
    
    # Read the CSV file
    data = pd.read_csv(file_name)
    
    # Keep only the last 'rows_to_keep' rows
    data_last_60_seconds = data.tail(rows_to_keep)
    
    # Save the truncated data back to a new CSV file
    output_file_name = f'truncated_{file_name}'
    data_last_60_seconds.to_csv(output_file_name, index=False)
    
    #print(f'Processed {file_name}, saved as {output_file_name}')

In [None]:
import pandas as pd

header = ['Fp1', 'Fp2', 'F3', 'F4', 'F7', 'F8', 'T3', 'T4', 'C3', 'C4', 'T5', 'T6', 'P3', 'P4', 'O1', 'O2', 'Fz', 'Cz', 'Pz']

# Number of rows to cut from the end
rows_to_cut = 1000

# Loop through all 36 subjects
for i in range(36):
    file_name = f's{i:02d}.csv'
    
    # Read the CSV file
    data = pd.read_csv(file_name, header=None)
    
    # Add header to the data
    data.columns = header
    
    # Remove the last 'rows_to_cut' rows
    data_trimmed = data.iloc[:-rows_to_cut]
    
    # Save the modified data back to a new CSV file
    output_file_name = f'modified_{file_name}'
    data_trimmed.to_csv(output_file_name, index=False)
    
    print(f'Processed {file_name}, saved as {output_file_name}')


In [23]:
import pandas as pd
import numpy as np
from scipy.signal import welch
import pywt

# Function to calculate band power from PSD
def bandpower(psd, freqs, band):
    fmin, fmax = band
    band_power = np.trapz(psd[(freqs >= fmin) & (freqs <= fmax)], freqs[(freqs >= fmin) & (freqs <= fmax)])
    return band_power

# Function to extract features from EEG data
def extract_features(data):
    # Sampling frequency 
    sf = 500

    # Channel names
    channel_names = ['Fp1', 'Fp2', 'F3', 'F4', 'F7', 'F8', 'T3', 'T4', 'C3', 'C4', 'T5', 'T6', 'P3', 'P4', 'O1', 'O2', 'Fz', 'Cz', 'Pz']
    data.columns = channel_names
    channels = data.columns

    # Frequency bands
    bands = {'delta': (1, 4), 'theta': (4, 8), 'alpha': (8, 13), 'beta': (13, 30), 'gamma': (30, 50)}

    # Initialize an empty dictionary to hold features
    features = {}

    # Extract frequency domain features
    for channel in channel_names:
        
        # Time-frequency analysis using Wavelet Transform
        coeffs, freqs = pywt.cwt(data[channel], scales=np.arange(1, 128), wavelet='morl', sampling_period=1/sf)
        features[f'{channel}_wavelet_energy'] = np.sum(np.square(coeffs))
        
        # Compute PSD using Welch's method
        freqs, psd = welch(data[channel], sf, nperseg=2*sf)
        
        # Extract band power from PSD
        for band, freq_range in bands.items():
            features[f'{channel}_{band}_power'] = bandpower(psd, freqs, freq_range)

    return pd.DataFrame([features])

# Number of samples per window
samples_per_window = 5 * 500

# Initialize an empty list to hold the DataFrames for all participants
all_windows_df = []

# Loop through all 36 subjects
for i in range(36):
    file_name = f'truncated_Subject{i:02d}_1.csv'
    
    # Read the CSV file
    data = pd.read_csv(file_name)
    
    # Split the data into 5-second windows
    for j in range(12):
        start_row = j * samples_per_window
        end_row = (j + 1) * samples_per_window
        window_data = data.iloc[start_row:end_row]
        
        # Extract features from the window
        window_features = extract_features(window_data)
        window_features['Participant'] = i
        window_features['Window'] = j + 1
        
        # Append the features to the list
        all_windows_df.append(window_features)
        
        #print(f'Processed window {j+1} for participant {i}, file: {file_name}')

# Concatenate all DataFrames into one
all_participants_df = pd.concat(all_windows_df, ignore_index=True)

# Save the combined DataFrame to a CSV file
all_participants_df.to_csv('extracted_features_windows_resting.csv', index=False)

# Display the combined DataFrame
display(all_participants_df)

Unnamed: 0,Fp1_wavelet_energy,Fp1_delta_power,Fp1_theta_power,Fp1_alpha_power,Fp1_beta_power,Fp1_gamma_power,Fp2_wavelet_energy,Fp2_delta_power,Fp2_theta_power,Fp2_alpha_power,...,Cz_beta_power,Cz_gamma_power,Pz_wavelet_energy,Pz_delta_power,Pz_theta_power,Pz_alpha_power,Pz_beta_power,Pz_gamma_power,Participant,Window
0,1.931874e+08,21.219146,7.982706,57.157342,11.639508,1.000080,2.615196e+08,28.509268,9.787913,67.926613,...,26.857014,1.582657,3.756728e+08,56.470441,16.403428,71.945753,20.876645,1.153739,0,1
1,1.995699e+08,19.841654,6.147316,43.659613,14.248195,1.074614,3.030335e+08,31.926661,8.399753,58.038298,...,25.238659,1.497023,3.348555e+08,35.289028,13.709126,68.213684,19.181798,0.877830,0,2
2,2.053806e+08,22.911314,12.010299,23.650567,13.991734,0.722587,2.602724e+08,28.988203,12.717833,30.951280,...,25.233624,1.209983,3.513000e+08,25.991442,19.564780,64.764706,23.879005,1.048006,0,3
3,1.830558e+08,14.904485,9.018637,31.810487,10.437537,0.846971,2.087408e+08,19.541213,10.905363,37.641240,...,19.806260,1.552312,5.116833e+08,33.848453,15.821802,52.896035,20.661117,1.175408,0,4
4,1.586093e+08,14.888547,9.283881,28.092781,12.157669,1.310075,2.502540e+08,32.582220,13.391448,35.819872,...,25.457891,1.894348,2.742534e+08,15.614317,8.680865,82.663898,22.555301,0.856952,0,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,1.598868e+08,21.441018,11.276718,3.903584,6.944535,0.764807,1.758161e+08,16.366115,12.591301,5.907720,...,10.244830,1.898826,2.380867e+08,31.681387,18.835508,12.752544,11.693456,1.313544,35,8
428,1.605526e+08,13.846419,5.572742,4.528530,5.912857,0.772464,1.260190e+08,13.389226,7.811138,5.178674,...,10.414517,1.943760,1.276728e+08,23.517019,6.468156,8.187758,11.432486,1.328607,35,9
429,1.186367e+08,11.255664,6.717039,5.563373,5.182532,0.928863,1.419908e+08,14.096630,7.153911,8.338639,...,8.440853,1.841814,1.783215e+08,23.644653,12.754271,7.160163,8.674991,1.541832,35,10
430,1.458222e+08,19.914506,8.474761,5.485153,5.084372,1.120291,9.717741e+07,13.161623,6.617756,3.906845,...,8.042470,2.142052,1.339695e+08,20.865120,7.939429,12.752315,7.938179,1.469672,35,11
