In [None]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install mne
!pip install numpy
!pip install matplotlib
!pip install pandas

## **Wavelet Coefficients**

In [None]:
import numpy as np
from scipy.stats import skew, kurtosis, entropy
from scipy.signal import welch
from pywt import wavedec

def compute_energy(coefficients):
    if isinstance(coefficients, np.ndarray):
        return np.sum(np.square(np.abs(coefficients))) / len(coefficients)
    elif np.isscalar(coefficients):
        return np.square(np.abs(coefficients))
    else:
        raise ValueError("Unsupported type for coefficients")

def compute_total_energy(approximation_coefficients, detail_coefficients):
    total_energy = 0
    total_energy += compute_energy(approximation_coefficients)
    for detail_coefficient in detail_coefficients:
        total_energy += compute_energy(detail_coefficient)
    return total_energy

def calculate_D_Energy(detail_coefficients):
    total_energy = 0
    for detail_coefficient in detail_coefficients:
        total_energy += compute_energy(detail_coefficient)
    return total_energy


def compute_mean(coefficients):
    return np.mean(coefficients)

def compute_std(coefficients):
    return np.std(coefficients)

def calculate_D_mean(coeffs):
    valid_indices = [i for i in range(1, min(6, len(coeffs)))]
    return np.mean([np.mean(coeffs[i]) for i in valid_indices])


def calculate_A_mean(coeffs):
    return compute_mean(coeffs[0])

def calculate_D_std(coeffs):
    return np.mean([compute_std(coeffs[i]) for i in range(min(6, len(coeffs)))])

def calculate_A_std(coeffs):
    return compute_std(coeffs[0])


def wavelet_feature_extraction(data, type_wav, sampling_frequency, nperseg):
    # Perform wavelet decomposition
    coefficients = wavedec(data, type_wav, level=5)
    # print('coefficients: ',coefficients)

    # Extract features
    total_energy = compute_total_energy(coefficients[0], coefficients[1:])
    cD_Energy=calculate_D_Energy(coefficients[1:])
    cA_Energy=compute_energy(coefficients[0])
    cD_mean = calculate_D_mean(coefficients[1:])
    cA_mean = calculate_A_mean(coefficients[0])
    cD_std = calculate_D_std(coefficients[1:])
    cA_std = calculate_A_std(coefficients[0])

    # Features related to signal statistics
    features = {
        'total_energy': total_energy,
        'cD_Energy': cD_Energy,
        'cA_Energy': cA_Energy,
        'cD_mean': cD_mean,
        'cA_mean': cA_mean,
        'cD_std': cD_std,
        'cA_std': cA_std,

    }

    return features






## **Frequency domain Coeffients**

In [None]:
import scipy.signal
import numpy as np

def get_median_frequency(psd):
    median_frequency = np.median(psd)

    return median_frequency

def get_edge_frequency(psd):
    edge_frequency = np.where(psd >= psd.max() / 2)[0][0]

    return edge_frequency

def compute_power_spectral_density(data, sampling_frequency, nperseg=256):
    _, psd = scipy.signal.welch(data, fs=sampling_frequency, nperseg=nperseg)
    return psd

def butter_bandpass(lowcut, highcut, fs, order=4):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = scipy.signal.butter(order, [low, high], btype='band')
    return b, a

def butter_bandpass_filter(data, lowcut, highcut, fs, order=4):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = scipy.signal.lfilter(b, a, data)
    return y

def compute_band_power(psd_result, freq_band_indices, fs, nperseg):
    freq_band_power = np.sum(psd_result[freq_band_indices]) * fs / nperseg
    return freq_band_power

def compute_spectral_entropy(psd):
    normalized_psd = psd / np.sum(psd)  # Normalize to obtain probabilities
    spectral_entropy = -np.sum(normalized_psd * np.log2(normalized_psd))
    return spectral_entropy

def extract_frequency_domain_features(signal, sampling_frequency, nperseg=256):
    # Apply Butterworth bandpass filters
    delta_band_signal = butter_bandpass_filter(signal, 0.5, 4, sampling_frequency)
    theta_band_signal = butter_bandpass_filter(signal, 4, 8, sampling_frequency)
    alpha_band_signal = butter_bandpass_filter(signal, 8, 13, sampling_frequency)
    beta_band_signal = butter_bandpass_filter(signal, 13, 30, sampling_frequency)
    gamma_band_signal = butter_bandpass_filter(signal, 30, 40, sampling_frequency)

    # Compute Power Spectral Density for each band
    delta_psd = compute_power_spectral_density(delta_band_signal, sampling_frequency, nperseg=nperseg)
    theta_psd = compute_power_spectral_density(theta_band_signal, sampling_frequency, nperseg=nperseg)
    alpha_psd = compute_power_spectral_density(alpha_band_signal, sampling_frequency, nperseg=nperseg)
    beta_psd = compute_power_spectral_density(beta_band_signal, sampling_frequency, nperseg=nperseg)
    gamma_psd = compute_power_spectral_density(gamma_band_signal, sampling_frequency, nperseg=nperseg)

    # Compute Band Power for each frequency band
    freq_band_indices = [range(int(nperseg * band[0] / sampling_frequency), int(nperseg * band[1] / sampling_frequency)) for band in [(0.5, 4), (4, 8), (8, 13), (13, 30), (30, 40)]]

    delta_band_power = compute_band_power(delta_psd, freq_band_indices[0], sampling_frequency, nperseg)
    theta_band_power = compute_band_power(theta_psd, freq_band_indices[1], sampling_frequency, nperseg)
    alpha_band_power = compute_band_power(alpha_psd, freq_band_indices[2], sampling_frequency, nperseg)
    beta_band_power = compute_band_power(beta_psd, freq_band_indices[3], sampling_frequency, nperseg)
    gamma_band_power = compute_band_power(gamma_psd, freq_band_indices[4], sampling_frequency, nperseg)

    # Compute Spectral Entropy
    spectral_entropy_result = compute_spectral_entropy(np.concatenate([delta_psd, theta_psd, alpha_psd, beta_psd, gamma_psd]))
    # Compute the power spectral density (PSD)
    psd, _ = scipy.signal.welch(signal, fs=sampling_frequency, nperseg=nperseg)



  # # Calculate the edge frequency
  #   # Print the frequency domain features
  #   print(f"Delta power: {delta_band_power}")
  #   print(f"Theta power: {theta_band_power}")
  #   print(f"Alpha power: {alpha_band_power}")
  #   print(f"Beta power: {beta_band_power}")
  #   print(f"Gamma power: {gamma_band_power}")
  #   print(f"spectral_entropy: {spectral_entropy_result}")


    # Return the results
    return {
        "delta_power": delta_band_power,
        "theta_power": theta_band_power,
        "alpha_power": alpha_band_power,
        "beta_power": beta_band_power,
        "gamma_power": gamma_band_power,
        "spectral_entropy": spectral_entropy_result,
    }





# **Time domain Coeffients**

In [None]:
import numpy as np
from scipy.fft import fft
import pywt
import os
from scipy.stats import entropy, skew, kurtosis
def compute_standard_deviation(data):
    return np.std(data)

def compute_skewness(data):
    return skew(data)

def compute_kurtosis(data):
    return kurtosis(data)

def compute_median(data):
    return np.median(data)

def compute_band_power_time(data, sampling_frequency, nperseg):
    _, power_density = welch(data, fs=sampling_frequency, nperseg=nperseg)
    return np.mean(power_density)
def peak_to_peak_voltage(data):
    return np.ptp(data)

def total_signal_area(data):
    return np.sum(np.abs(data))

def decorrelation_time(data):
    autocorrelation = np.correlate(data, data, mode='full')
    zero_crossings = np.where(np.diff(np.sign(autocorrelation)))[0]

    if len(zero_crossings) > 0:
        first_zero_crossing = zero_crossings[0]
        time_points = np.arange(len(autocorrelation))
        decorrelation_time = time_points[first_zero_crossing]
        return decorrelation_time
    else:
        # Return a default value if no zero-crossing is found
        return -1
def extract_time_domain_features(data,sampling_frequency, nperseg):
    features = {
        'std_deviation': compute_standard_deviation(data),
        'skewness': compute_skewness(data),
        'kurtosis': compute_kurtosis(data),
        'median': compute_median(data),
        'band_power': compute_band_power_time(data, sampling_frequency, nperseg),
        'peak_to_peak_voltage': peak_to_peak_voltage(data),
        'total_signal_area': total_signal_area(data),
        'decorrelation_time': decorrelation_time(data)
    }

    # # Print the time domain features
    # print(f"std_deviation: {compute_standard_deviation(data)}")
    # print(f"skewness: {compute_skewness(data)}")
    # print(f"kurtosis: {compute_kurtosis(data)}")
    # print(f"median: {compute_median(data)}")
    # print(f"band_power: {compute_band_power_time(data, sampling_frequency, nperseg)}")
    # print(f"peak_to_peak_voltage: {peak_to_peak_voltage(data)}")
    # print(f"total_signal_area: {total_signal_area(data)}")
    # print(f"decorrelation_time: {decorrelation_time(data)}")
    return features

# **Feature Extraction**

In [None]:
# !pip install numpy==1.25.1
import numpy as np
import scipy.signal as signal
from scipy.fft import fft
import pywt
import os
def extract_features(segment):
    # Number of channels and time points in the segment
    num_channels, num_time_points = segment.shape

    # Initialize a list to store the extracted features
    all_extracted_features = []

    # Iterate over each channel in the segment
    for channel_data in segment:
        extracted_features = []
        wavelet_name = 'db4'
        wavelet_features = wavelet_feature_extraction(channel_data, wavelet_name, 256,256)
        time_domain_features=extract_time_domain_features(channel_data,256,256)
        frequency_features=extract_frequency_domain_features(channel_data, 256)
        extracted_features.extend([wavelet_features,time_domain_features,frequency_features])

        all_extracted_features.append(extracted_features)
    return np.array(all_extracted_features)

# Extract features for each segment without creating the segments array explicitly
def extract_features_without_segments(data, time_window_duration):
    num_channels, num_data_points = data.shape
    data_per_segment = time_window_duration * 256  # Assuming a 256 Hz sampling rate

    # Calculate the number of segments
    num_segments = num_data_points // data_per_segment


    # Extract features for each segment directly
    all_features = []

    for i in range(num_segments):
        print('segment', i)
        features = extract_features(data[:, i * data_per_segment: (i + 1) * data_per_segment])
        all_features.append(features)

    all_features_array = np.array(all_features)
    return all_features



# Example usage:
folder_path = '/content/drive/MyDrive/EEG-Projects/CHB-MIT-2hour-Segments-Non-Seizure'
time_window_duration = 60  # 60 seconds as an example

all_data_features = load_and_process_files(folder_path, time_window_duration)


# **Load Files**

In [None]:
def load_and_process_files(folder_path, time_window_duration):
    # Get a list of all files in the folder
    file_list = [f for f in os.listdir(folder_path) if f.endswith('.npy')]

    all_data_features = []

    for file_name in file_list:
        file_path = os.path.join(folder_path, file_name)

        # Load preictal or interictal data from the file
        data = np.load(file_path)

        # Create overlapping segments for each channel
        features = extract_features_without_segments(data, time_window_duration)
        print('file: ',file_name)
        all_data_features.append(features)

        # Explicitly delete the data variable to release memory
        del data

    return all_data_features

Main

In [None]:
folder_path = '/content/drive/MyDrive/EEG-Projects/CHB-MIT-2hour-Segments-Non-Seizure'
time_window_duration = 60  # 60 seconds as an example

all_data_features =load_and_process_files(folder_path, time_window_duration)


In [None]:
 np.array(all_data_features).shape

(12, 120, 23, 3)

In [None]:
all_data_features[0][0]

array([[{'total_energy': 2.4842485118122007e-07, 'cD_Energy': 4.002323078824115e-08, 'cA_Energy': 2.0840162039297894e-07, 'cD_mean': -1.9781519507972e-06, 'cA_mean': -9.209952927454327e-05, 'cD_std': 6.043819780197209e-05, 'cA_std': 0.0},
        {'std_deviation': 9.049034413335049e-05, 'skewness': -0.042302631924195484, 'kurtosis': 6.964205894428396, 'median': 1.367521367521367e-06, 'band_power': 5.7515721965318415e-11, 'peak_to_peak_voltage': 0.0009318681318681318, 'total_signal_area': 0.7579108180708181, 'decorrelation_time': 51},
        {'delta_power': 5.611607116236649e-09, 'theta_power': 7.694392016773123e-10, 'alpha_power': 7.053922894562868e-11, 'beta_power': 1.4076153478552373e-10, 'gamma_power': 9.8504808050238e-11, 'spectral_entropy': 3.04945032773393}],
       [{'total_energy': 3.2076651248753924e-08, 'cD_Energy': 3.9489490892112675e-09, 'cA_Energy': 2.812770215954265e-08, 'cD_mean': -2.1661028197947935e-07, 'cA_mean': -2.6458022193576522e-05, 'cD_std': 2.0229647012480565e

In [None]:
!git clone https://github.com/oshan35/Epileptic-Seizure-Prediction-with-EEG

Cloning into 'Epileptic-Seizure-Prediction-with-EEG'...
remote: Enumerating objects: 10, done.[K
remote: Counting objects: 100% (10/10), done.[K
remote: Compressing objects: 100% (9/9), done.[K
remote: Total 10 (delta 3), reused 3 (delta 0), pack-reused 0[K
Receiving objects: 100% (10/10), 13.34 KiB | 4.45 MiB/s, done.
Resolving deltas: 100% (3/3), done.


In [18]:
cd /content/drive/MyDrive/Sem_5/Machine Learning/MLProject/Epileptic-Seizure-Prediction-with-EEG


/content/drive/MyDrive/Sem_5/Machine Learning/MLProject/Epileptic-Seizure-Prediction-with-EEG


In [24]:
!git commit -m 'feature extraction added'

[main 447d279] feature extraction added
 1 file changed, 1 insertion(+)
 create mode 100644 Feature_Extraction.ipynb


In [29]:
!git remote remove origin
!git remote add origin https://Nehara831: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@github.com/oshan35/Epileptic-Seizure-Prediction-with-EEG.git
!git push -u origin master


error: src refspec master does not match any
[31merror: failed to push some refs to 'https://github.com/oshan35/Epileptic-Seizure-Prediction-with-EEG.git'
[m

In [32]:
!git remote remove origin
!git remote add origin https://Nehara831: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@github.com/oshan35/Epileptic-Seizure-Prediction-with-EEG.git
!git pull origin master


fatal: couldn't find remote ref master


In [21]:
!git config --global user.email "tnehara831@gmail.com"
!git config --global user.name "Nehara831"

In [23]:
!git add .

In [16]:
import shutil

# Source and destination paths
source_path = '/content/drive/MyDrive/Sem_5/Machine Learning/MLProject/Feature_Extraction.ipynb'
destination_path = '/content/drive/MyDrive/Sem_5/Machine Learning/MLProject/Epileptic-Seizure-Prediction-with-EEG'

# Move the file
shutil.move(source_path, destination_path)


'/content/drive/MyDrive/Sem_5/Machine Learning/MLProject/Epileptic-Seizure-Prediction-with-EEG/Feature_Extraction.ipynb'

In [13]:
mv /content/drive/MyDrive/Colab Notebooks/Feature_Extraction.ipynb
/content/drive/MyDrive/Sem_5/Machine Learning/MLProject/Epileptic-Seizure-Prediction-with-EEG

SyntaxError: ignored

In [None]:
!git add .
!git commit -m "Feature extraction pipeline added"

Author identity unknown

*** Please tell me who you are.

Run

  git config --global user.email "you@example.com"
  git config --global user.name "Your Name"

to set your account's default identity.
Omit --global to set the identity only in this repository.

fatal: unable to auto-detect email address (got 'root@d191a9099d6f.(none)')


In [None]:
!git config --global user.email "tnehara831@gmail.com"
!git config --global user.name "Nehara831"


In [None]:
!git add .
!git commit -m "Feature extraction pipeline added"

On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean


In [None]:
!git pull




Already up to date.


In [None]:
!git push https://github.com/oshan35/Epileptic-Seizure-Prediction-with-EEG


fatal: could not read Username for 'https://github.com': No such device or address
