# Make dataset to train ML models

In [1]:
from sz_utils import data_handler

## 1. Take 5 minutes before each seizure. 

In [3]:
# Just for patient 1
# 1. Load the data
patients = data_handler.get_patients()

# 2. Get edf files for patients[0]
edf_files = data_handler.get_patient_edf(patients[0])

edf_files[:5]

['chb01_01.edf',
 'chb01_02.edf',
 'chb01_03.edf',
 'chb01_04.edf',
 'chb01_05.edf']

In [5]:
# 3. Get info about seizures
seizures = data_handler.get_seizure_data(patients[0])

seizures.head()

Unnamed: 0,file_name,number_of_seizures,start_end_times
0,chb01_01.edf,0,
1,chb01_02.edf,0,
2,chb01_03.edf,1,"[(2996, 3036)]"
3,chb01_04.edf,1,"[(1467, 1494)]"
4,chb01_05.edf,0,


In [7]:
# 4. Split into the one with and without seizures
seizures_with = seizures[seizures["number_of_seizures"] > 0]
seizures_without = seizures[seizures["number_of_seizures"] == 0]

In [22]:
# 5. For each file with seizures, get the seizure times
from ast import literal_eval

seizure_times = []
for edf_file in seizures_with["file_name"]:
    seizure_times.append(
        literal_eval(seizures_with[seizures_with["file_name"] == edf_file]["start_end_times"].values[0])
    )

seizure_times

[[(2996, 3036)],
 [(1467, 1494)],
 [(1732, 1772)],
 [(1015, 1066)],
 [(1720, 1810)],
 [(327, 420)],
 [(1862, 1963)]]

In [17]:
# 6. Get the edf_files_names with seizures
edf_files_with_seizures = seizures_with["file_name"].values
edf_files_with_seizures

array(['chb01_03.edf', 'chb01_04.edf', 'chb01_15.edf', 'chb01_16.edf',
       'chb01_18.edf', 'chb01_21.edf', 'chb01_26.edf'], dtype=object)

In [33]:
# 7. For each seizure_times and edf_files_with_seizures, take 5 minutes (256*60*5) before the seizure
# from the edf data
seizure_samples = []

for edf_file, seizure_time in zip(edf_files_with_seizures, seizure_times):
    edf_data = data_handler.get_edf_data(patients[0], edf_file)
    first_seizure_time = seizure_time[0][0] # seizure_time is a list[tuple[int, int]]
    first_seizure_time *= 256 # 256 Hz
    seizure_samples.append(edf_data[first_seizure_time - 256*60*5:first_seizure_time])

Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_03.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_04.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_15.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_16.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_18.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_21.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_26.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


In [34]:
# shape of seizure_samples
len(seizure_samples), seizure_samples[0].shape

(7, (76800, 24))

In [35]:
# 8. Split data into 5 second windows
import numpy as np

seizure_windows = []
for seizure_sample in seizure_samples:
    num_samples = seizure_sample.shape[0]
    samples_per_window = 256*5 # 256 Hz * 5 seconds
    seizure_windows.append(np.array_split(seizure_sample, num_samples // samples_per_window))

# shape of seizure_windows
len(seizure_windows), len(seizure_windows[0]), seizure_windows[0][0].shape

(7, 60, (1280, 24))

## 2. Take the equal amount of samples from 5 minutes in a non seizure edf file.

In [36]:
# 9. No seizure samples
# Get the edf_files_names without seizures
edf_files_without_seizures = seizures_without["file_name"].values
edf_files_without_seizures

array(['chb01_01.edf', 'chb01_02.edf', 'chb01_05.edf', 'chb01_06.edf',
       'chb01_07.edf', 'chb01_08.edf', 'chb01_09.edf', 'chb01_10.edf',
       'chb01_11.edf', 'chb01_12.edf', 'chb01_13.edf', 'chb01_14.edf',
       'chb01_17.edf', 'chb01_19.edf', 'chb01_20.edf', 'chb01_22.edf',
       'chb01_23.edf', 'chb01_24.edf', 'chb01_25.edf', 'chb01_27.edf',
       'chb01_29.edf', 'chb01_30.edf', 'chb01_31.edf', 'chb01_32.edf',
       'chb01_33.edf', 'chb01_34.edf', 'chb01_36.edf', 'chb01_37.edf',
       'chb01_38.edf', 'chb01_39.edf', 'chb01_40.edf', 'chb01_41.edf',
       'chb01_42.edf', 'chb01_43.edf', 'chb01_46.edf'], dtype=object)

In [37]:
# 10. Randomly 7 edf files without seizures
import random

random.seed(42)
edf_files_without_seizures = random.sample(list(edf_files_without_seizures), 7)
edf_files_without_seizures

['chb01_10.edf',
 'chb01_02.edf',
 'chb01_24.edf',
 'chb01_22.edf',
 'chb01_46.edf',
 'chb01_07.edf',
 'chb01_32.edf']

In [38]:
# 11. For each edf_files_without_seizures, take 5 minutes (256*60*5) before the seizure
# from the edf data
no_seizure_samples = []

for edf_file in edf_files_without_seizures:
    edf_data = data_handler.get_edf_data(patients[0], edf_file)
    middle_time = edf_data.shape[0] // 2
    no_seizure_samples.append(edf_data[middle_time - 256*60*5:middle_time])

Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_10.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_02.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_24.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_22.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_46.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_07.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_32.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


In [39]:
# 12. Split data into 5 second windows
no_seizure_windows = []
for no_seizure_sample in no_seizure_samples:
    num_samples = no_seizure_sample.shape[0]
    samples_per_window = 256*5 # 256 Hz * 5 seconds
    no_seizure_windows.append(np.array_split(no_seizure_sample, num_samples // samples_per_window))

# shape of no_seizure_windows
len(no_seizure_windows), len(no_seizure_windows[0]), no_seizure_windows[0][0].shape

(7, 60, (1280, 24))

## 3. Chunk the data

In [40]:
# 13. From (n, a, (b, c)) to (n*a, b, c)
seizure_windows = np.array(seizure_windows)
seizure_windows = np.concatenate(seizure_windows, axis=0)
seizure_windows.shape

(420, 1280, 24)

In [41]:
# 14. Same for no_seizure_windows
no_seizure_windows = np.array(no_seizure_windows)
no_seizure_windows = np.concatenate(no_seizure_windows, axis=0)
no_seizure_windows.shape

(420, 1280, 24)

# Make function

In [12]:
import numpy as np

import random
from typing import Tuple
from sz_utils import data_handler
from ast import literal_eval


# make function
def get_patient_windows(patient) -> Tuple[np.ndarray, np.ndarray]:
    """Get preictal and not preictal samples.

    :param patient: patient id name (eg. chb01)
    :type patient: str
    :return: preictal and not preictal samples
    :rtype: Tuple[np.ndarray, np.ndarray]
    """
    features = ['FP1-F7', 'F7-T7', 'T7-P7', 'P7-O1', 'FP1-F3', 'F3-C3', 'C3-P3', 'P3-O1', 
            'FP2-F4', 'F4-C4', 'C4-P4', 'P4-O2', 'FP2-F8', 'F8-T8', 'T8-P8-0', 'P8-O2', 
            'FZ-CZ', 'CZ-PZ', 'P7-T7', 'T7-FT9', 'FT9-FT10', 'FT10-T8']

    # 1. Get edf files for patient
    edf_files = data_handler.get_patient_edf(patient)
    # 2. Get info about seizures
    seizures = data_handler.get_seizure_data(patient)
    # 3. Split into the one with and without seizures
    seizures_with = seizures[seizures["number_of_seizures"] > 0]
    seizures_without = seizures[seizures["number_of_seizures"] == 0]
    # 4. For each file with seizures, get the seizure times
    seizure_times = []
    for edf_file in seizures_with["file_name"]:
        seizure_times.append(
            literal_eval(seizures_with[seizures_with["file_name"] == edf_file]["start_end_times"].values[0])
        )
    # 5. Get the edf_files_names with seizures
    edf_files_with_seizures = seizures_with["file_name"].values 
    # 6. For each seizure_times and edf_files_with_seizures, take 5 minutes (256*60*5) before the seizure
    # from the edf data
    seizure_samples = []
    for edf_file, seizure_time in zip(edf_files_with_seizures, seizure_times):
        edf_data = data_handler.get_edf_data(patient, edf_file)
        edf_data = edf_data[features]
        first_seizure_time = seizure_time[0][0]
        first_seizure_time *= 256
        seizure_samples.append(edf_data[first_seizure_time - 256*60*5:first_seizure_time])
    # 7. Split data into 5 second windows
    seizure_windows = []
    for seizure_sample in seizure_samples:
        num_samples = seizure_sample.shape[0]
        samples_per_window = 256*5
        seizure_windows.append(np.array_split(seizure_sample, num_samples // samples_per_window))
    # 8. No seizure samples
    # Get the edf_files_names without seizures
    edf_files_without_seizures = seizures_without["file_name"].values
    # 9. Randomly 7 edf files without seizures
    random.seed(42)
    edf_files_without_seizures = random.sample(list(edf_files_without_seizures), len(seizure_samples))
    # 10. For each edf_files_without_seizures, take 5 minutes (256*60*5) before the seizure
    # from the edf data
    no_seizure_samples = []
    for edf_file in edf_files_without_seizures:
        edf_data = data_handler.get_edf_data(patient, edf_file)
        edf_data = edf_data[features]
        middle_time = edf_data.shape[0] // 2
        no_seizure_samples.append(edf_data[middle_time - 256*60*5:middle_time])
    # 11. Split data into 5 second windows
    no_seizure_windows = []
    for no_seizure_sample in no_seizure_samples:
        num_samples = no_seizure_sample.shape[0]
        samples_per_window = 256*5
        no_seizure_windows.append(np.array_split(no_seizure_sample, num_samples // samples_per_window))
    # 12. From (n, a, (b, c)) to (n*a, b, c)
    seizure_windows = np.array(seizure_windows)
    seizure_windows = np.concatenate(seizure_windows, axis=0)
    # 13. Same for no_seizure_windows
    no_seizure_windows = np.array(no_seizure_windows)
    no_seizure_windows = np.concatenate(no_seizure_windows, axis=0)
    return seizure_windows, no_seizure_windows

In [13]:
s, ns = get_patient_windows("chb01")

Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_03.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_04.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_15.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_16.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_18.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_21.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_26.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_10.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_02.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_24.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_22.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_46.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_07.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


Extracting EDF parameters from /mnt/e/Documents/projects/seizure-prediction/data/chb-mit-scalp-eeg-database-1.0.0/chb01/chb01_32.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  mne_data = mne.io.read_raw_edf(edf_path)


In [14]:
s.shape, ns.shape

((420, 1280, 22), (420, 1280, 22))