In [22]:
#import pandas as pd
import numpy as np
import os
#import glob
import pywt
from scipy.stats import skew, kurtosis, entropy
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['axes.grid'] = False


In [2]:
#Define some parameters
sampling_rate = 512.0  # Sampling rate of EEG data (Hz)
epoch_duration_seconds = 20  # Duration of each epoch in seconds
overlap_seconds = 10  # Overlap between consecutive epochs in seconds

# Calculate the number of samples for epoch_duration
epoch_duration = int(epoch_duration_seconds * sampling_rate)

# Calculate the number of samples for overlap
overlap = int(overlap_seconds * sampling_rate)

In [3]:
def generate_epoch(data):
    # Calculate the number of epochs
    num_epochs = (len(data) - epoch_duration) // (epoch_duration - overlap) + 1

    # Segment EEG data into epochs
    epochs = []
    for i in range(num_epochs):
        start_idx = i * (epoch_duration - overlap)
        end_idx = start_idx + epoch_duration
        epoch = data[start_idx:end_idx]
        epochs.append(epoch)

    # Convert list of epochs to numpy array
    return np.array(epochs)

In [4]:
# Define a function to calculate Hjorth parameters
def hjorth_parameters(epoch):
    activity = np.var(epoch)  # Activity
    mobility = np.sqrt(np.var(np.diff(epoch)) / activity) if activity != 0 else 0  # Mobility
    complexity = (np.sqrt(np.var(np.diff(np.diff(epoch))) / np.var(np.diff(epoch)))) / mobility if mobility != 0 else 0  # Complexity
    return activity, mobility, complexity

# Define a function to calculate Slope Sign Change (SSC)
def slope_sign_change(epoch):
    ssc = np.sum(np.diff(np.sign(np.diff(epoch))) != 0)  # Slope Sign Change
    return ssc

def calculate_crest_factor(epoch):
    rms = np.sqrt(np.mean(epoch**2))
    max_amplitude = np.max(epoch)
    if rms != 0:
        return max_amplitude / rms
    else:
        return 0.0  # Return 0 if RMS is zero

def calculate_entropy(epoch):
    non_zero_epoch = epoch[epoch != 0]  # Exclude zero values from epoch
    if len(non_zero_epoch) > 0:
        return entropy(non_zero_epoch)
    else:
        return 0.0  # Return 0 if no non-zero values
    
# Define a function to extract time-domain features from EEG epochs
def extract_time_domain_features(epochs):
    features = []
    for epoch in epochs:
        # Calculate time-domain features
        mean_amplitude = np.mean(epoch)  # Mean amplitude
        std_amplitude = np.std(epoch)  # Standard deviation of amplitude
        max_amplitude = np.max(epoch)  # Maximum amplitude
        min_amplitude = np.min(epoch)  # Minimum amplitude
        median_amplitude = np.median(epoch)  # Median amplitude
        range_amplitude = max_amplitude - min_amplitude  # Range of amplitude
        
        # Additional time-domain features
        skewness = skew(epoch)  # Skewness
        kurt = kurtosis(epoch)  # Kurtosis
        rms = np.sqrt(np.mean(epoch**2))  # Root Mean Square
        zcr = np.mean(np.diff(np.sign(epoch)))  # Zero Crossing Rate
        waveform_length = np.sum(np.abs(np.diff(epoch)))  # Waveform Length
        energy = np.sum(epoch**2)  # Signal Energy
        
        # Crest Factor
        crest_factor = calculate_crest_factor(epoch)
        
        # Hjorth parameters
        activity, mobility, complexity = hjorth_parameters(epoch)
        
        # Slope Sign Change (SSC)
        ssc = slope_sign_change(epoch)
        
        # Mean Absolute Value (MAV)
        mav = np.mean(np.abs(epoch))
        
        # Time-domain entropy
        entropy_value = calculate_entropy(epoch)
        
        
        # Append features to the list
        features.append([mean_amplitude, std_amplitude, max_amplitude, min_amplitude, median_amplitude, range_amplitude,
                         skewness, kurt, rms, zcr, waveform_length, energy, crest_factor,
                         activity, mobility, complexity, ssc, mav,entropy_value])

    return np.array(features)

In [5]:
#LOADING EEG DATA AND PROCESSING
def load_data_process(data_path):
    # #Total Missing Value
    total_missing=0

    #Baseline EEG
    all_eeg=[]

    # List all files in the baseline directory
    files_list = os.listdir(data_path)

    # Iterate over each file
    for filename in files_list:
        # Check if the filename starts with "ID#"
        if filename.startswith("ID#"):
            # Construct the full file path
            file_path = os.path.join(data_path, filename)
            # Check if the item in the directory is a file
            if os.path.isfile(file_path):
                # Open the file and read its contents
                with open(file_path, 'r') as file:
                    eeg_data = np.loadtxt(file, delimiter=' ')
            
                    # Decompose EEG data using wavelet transform
                    coeffs = pywt.wavedec(eeg_data, wavelet='db4', level=5)
                    # Reconstruct filtered EEG data
                    filtered_eeg = pywt.waverec(coeffs, wavelet='db4')
                      
                    #Generating Epochs
                    epochs = generate_epoch(filtered_eeg)
            
                    #Feature Extraction
                    time_domain_feature= extract_time_domain_features(epochs)
            
                    #Missing Value Analysis and Removal
                    total_missing=total_missing+np.isnan(time_domain_feature).sum()
                    missing_td_feature = np.isnan(time_domain_feature).any(axis=1)
                    
                    # Drop rows with missing values
                    cleaned_td_feature = time_domain_feature[~missing_td_feature]
            
                    #Combine all the baseline processed data
                    all_eeg.append(cleaned_td_feature)

    # Convert the list of baseline EEG to a single NumPy array
    return np.concatenate(all_eeg, axis=0), total_missing

In [6]:
# Define the directory path where the files are located
baseline_dir = '/Users/oak/Downloads/Data_Set/Baseline/'
concat_baseline_eeg, total_missing=load_data_process(baseline_dir)
concat_baseline_eeg.shape

  skewness = skew(epoch)  # Skewness
  kurt = kurtosis(epoch)  # Kurtosis


(584925, 19)

In [7]:
#Print Total missing value removed
print('Total missing values:', total_missing)

Total missing values: 1442


In [8]:
np.save('/Users/oak/Downloads/Data_Set/Processed_egg/Baseline_eeg.npy', concat_baseline_eeg)

In [11]:
# Define the directory path where the files are located
epilepto_dir = '/Users/oak/Downloads/Data_Set/Epileptogenesis'
concat_epile_eeg, total_missing=load_data_process(epilepto_dir)
concat_epile_eeg.shape

  skewness = skew(epoch)  # Skewness
  kurt = kurtosis(epoch)  # Kurtosis


(834915, 19)

In [12]:
#Print Total missing value removed
print('Total missing values:', total_missing)

Total missing values: 910


In [None]:
np.save('/Users/oak/Downloads/Data_Set/Processed_egg/Epilepto_eeg.npy', concat_epile_eeg)