In [61]:
from glob import glob
import os
import mne
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [62]:
# Get the data set path file
all_file_path = glob('../dataverse_files/*.edf')
print(len(all_file_path))
print(all_file_path[0])

28
../dataverse_files\h01.edf


In [63]:
# Separate healthy and schozofrenic patients eeg data
healthy_file_path = [i for i in all_file_path if 'h' in i.split('\\')[1]]
patient_file_path = [i for i in all_file_path if 's' in i.split('\\')[1]]

In [64]:
# Function to read the data from .edf files using mne

def read_data(file_path, low_freq=0.5, high_freq=45, duration=5, overlap=1):
    data = mne.io.read_raw_edf(file_path, preload=True)
    data.set_eeg_reference()
    data.filter(l_freq=low_freq, h_freq=high_freq)
    epochs = mne.make_fixed_length_epochs(data, duration=duration, overlap=overlap)
    array = epochs.get_data()
    return array

In [65]:
sample_data = read_data(healthy_file_path[0])
sample_data.shape   # Nb of epochs (time windows speficied by duration in the function above), channels (nb of electrodes in the eeg), lenght of the signal (nb of total points)

Extracting EDF parameters from c:\Projects\EEG Classification\dataverse_files\h01.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 231249  =      0.000 ...   924.996 secs...
EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 45 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 45.00 Hz
- Upper transition bandwidth: 11.25 Hz (-6 dB cutoff frequency: 50.62 Hz)
- Filter length: 1651 samples (6.604 s)

Not setting metadata
231 matching events found
No baseline correction applied
0 projection items

[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.0s


(231, 19, 1250)

In [66]:
%%capture

control_epochs_array = [read_data(i) for i in healthy_file_path]
patient_epochs_array = [read_data(i) for i in healthy_file_path]

# each array will have the data of 14 subjects

In [67]:
# labeling healthy data as "0" and schizofrenic patient data as "1" for each epoch on each subject

control_epochs_labels = [len(i)*[0] for i in control_epochs_array]
patient_epochs_labels = [len(i)*[1] for i in patient_epochs_array]

len(control_epochs_labels), len(patient_epochs_labels)

(14, 14)

In [68]:
data_list = control_epochs_array + patient_epochs_array    # all data
label_list = control_epochs_labels + patient_epochs_labels  # all labels

In [69]:
# Identifying data per subject
group_list = [[i]*len(j) for i,j in enumerate(data_list)]
print(len(group_list))

28


In [None]:
# Transforms into array of eeg signals stacked

data_array = np.vstack(data_list)
label_array = np.hstack(label_list)
group_array = np.hstack(group_list)
print(data_array.shape, label_array.shape, group_array.shape)   # (epochs, electrodes, total points), (labels - 0 for healthy, 1 for patient), (subjects)

(6502, 19, 1250) (6502,) (6502,)
