# Hyperbilirubin (exchange blood transfusion) dataset (36 subjects)

Reference datasets:
- healthy dataset
- hyper-blood-change-notchange-sample

This dataset contains no annotations. Then we would like to:

1. Through time-frequency analysis, to check the EEG signal quality. hyperbilirubin vs healthy dataset.
2. Time frequency analysis: Exchange blood vs not exchange blood 
3. Predict the time of different sleep stages
4. compare the sleep quality with that of the healthy dataset

## load the dataset

In [1]:
import os
import copy
import pylab
import numpy as np
import pandas as pd
import matplotlib
#matplotlib.use("TkAgg")
import matplotlib.pyplot as plt
import mne 
import seaborn as sns
from multiprocessing import Pool
import threading

mne.set_log_level('ERROR')

In [2]:
def readPath(path):
    file_path = []
    for root, dirs, files in os.walk(path):
        
        for file in files:
            if file.endswith('.edf'):
                path_name = os.path.join(root, file)
                file_path.append(path_name)
            #print(path_name)

    return file_path


In [3]:
def loadFile(path, exclude_channels=True, 
                             crop_wake_mins=30):
    """Load a raw.edf file.

    Parameters
    ----------
    path : str
        Path to the .edf file containing the raw data. 
    exclude_channels : bool
        If True, only keep EEG channels and discard other modalities 
        (speeds up loading).
    crop_wake_mins : float
        Number of minutes of wake events before and after sleep events.

    Returns
    -------
    mne.io.Raw :
        Raw object containing the EEG and annotations.        
    """

    mapping = {
    # 'EEG Fp1-AV',
    # #'EEG Fp2-AV',
    # 'EEG C3-AV',
    # #'EEG C4-AV',
    # #'EEG P3-AV',

    # #'EEG O1-AV',
    # #'EEG T3-AV',
    # 'EEG T4-AV',
    # 'EEG Cz-AV',
    # #'EEG Pz-AV',
    # 'EEG O2-AV',
    # 'EEG P4-AV',
     'ECG',
     'EMG Left_Leg',
     'EMG Right_Leg'}

    
    exclude = mapping if exclude_channels else ()

    #Read the raw data and annotations
    raw = mne.io.read_raw_edf(path, exclude=exclude)

    if not exclude_channels:
        raw.set_channel_types(mapping)

    # Rename EEG channel: replace EEG
    ch_names = {i: i.replace('EEG ', '') 
                for i in raw.ch_names if 'EEG' in i}
    mne.rename_channels(raw.info, ch_names)


    #store the subject information:
    
    basename = os.path.basename(path)
    subj_nb = os.path.splitext(basename)[0] #e.g., 113
    raw.info['subject_info'] = {'id': subj_nb}
    

    return raw

In [4]:
# path_1 = './healthy-2023' #24
# path_2 = './healthy-2024' #23
path_3 = './hyper-bloodchange-2024' #36
# path_4 = './hyper-blood-change-notchange-sample' #6

In [5]:
# res_1 = readPath(path_1) #healthy - from 2023
# res_2 = readPath(path_2) # healthy - 2024
res_3 = readPath(path_3) #高胆换血病例EEG-
# res_4 = readPath(path_4) #高胆红素血症（换血及未换血各3个）

#start with the healthy dataset
# res_healthy = res_1 + res_2

In [6]:
# raws_healthy = [loadFile(f) for f in res_healthy]
raws = [loadFile(f) for f in res_3]
# raws_4 = [loadFile(f) for f in res_4]


In [7]:
len(raws)

36

#### Check the data quality, pick only a few recordings

Check the length of each recording, check how many channels each recording has. <br>
Remove the following files: <br>
10 in total:
- 9 channels: 200, 221, 222, 223
- very short length: 201, 202, 203, 204, 205, 206

##### Now we have 26 recordings.

In [8]:
exclude_id = [200, 221, 222, 223,201, 202, 203, 204, 205, 206]
exclude_num = [x-200 for x in exclude_id]
exclude_num

[0, 21, 22, 23, 1, 2, 3, 4, 5, 6]

In [9]:
raws_subset = []
for i, raw in enumerate(raws):
    if i not in exclude_num:
        raws_subset.append(raw)
len(raws_subset)    

26

In [10]:
raws_subset

[<RawEDF | 207.edf, 12 x 18803500 (37607.0 s), ~14 kB, data not loaded>,
 <RawEDF | 208.edf, 12 x 24201000 (48402.0 s), ~14 kB, data not loaded>,
 <RawEDF | 209.edf, 12 x 9331500 (18663.0 s), ~14 kB, data not loaded>,
 <RawEDF | 210.edf, 12 x 7309500 (14619.0 s), ~14 kB, data not loaded>,
 <RawEDF | 211.edf, 12 x 24315000 (48630.0 s), ~14 kB, data not loaded>,
 <RawEDF | 212.edf, 12 x 22587500 (45175.0 s), ~14 kB, data not loaded>,
 <RawEDF | 213.edf, 9 x 8326000 (16652.0 s), ~12 kB, data not loaded>,
 <RawEDF | 214.edf, 12 x 23199500 (46399.0 s), ~14 kB, data not loaded>,
 <RawEDF | 215.edf, 12 x 22196000 (44392.0 s), ~14 kB, data not loaded>,
 <RawEDF | 216.edf, 12 x 23241000 (46482.0 s), ~14 kB, data not loaded>,
 <RawEDF | 217.edf, 12 x 9648000 (19296.0 s), ~14 kB, data not loaded>,
 <RawEDF | 218.edf, 12 x 22276000 (44552.0 s), ~14 kB, data not loaded>,
 <RawEDF | 219.edf, 12 x 11635000 (23270.0 s), ~14 kB, data not loaded>,
 <RawEDF | 220.edf, 12 x 8073500 (16147.0 s), ~14 kB, da

Test unit: Extract epochs without events needed.

In [11]:
# epochs = mne.make_fixed_length_epochs(raws[0], duration=30, preload=False)
# epochs.plot_image(picks=1)

In [12]:
# #No events in this dataset
# count = -1
# for raw in raws:
#     count +=1
#     print(count)
#     events, _ = mne.events_from_annotations(
#         raw, 
#         chunk_duration=30) 
#     print(events)

In [13]:
# i = -1
# for raw in raws:
#     i += 1
#     print("subject: ",i)
#     events,_ = mne.events_from_annotations(raw,chunk_duration=30)
#     print(len(events))

## 这个数据集读不出来标注，可是使用EDFBrowser查看，有些文件是有标注的，需要确认是否有标注。<br>

When setting chunk_duration=30, cannot read the annotation. Maybe only annotate the start time of stages. No duration > 30s.

## Time frequency analysis

objective: to generate PSD plots -- per subject, per sleep stage <br>
method:
1. filter (keep < 30 HZ)
2. extract epochs
3. compute PSD for epochs of different sleep stages

- (exchange vs not exchange blood)
- vs healthy

In [14]:
def filter(raw):
    l_freq, h_freq = None, 30
    raw.load_data().filter(l_freq, h_freq)  # filtering happens in-place  

Filter the data.

In [15]:
#Filter the input data (remove the power line noise)
for raw in raws_subset:
    filter(raw)

Generate an object: epochs_all, this is a set for epochs for each subject.

### Epoch Image Map -- Frequency analysis: per subject x electrode positions

Based on the 10-20 international system of electrode placement and the electodes we have: <br>
Our electrodes can be arranged into: <br>
- Frontal lobe: FP1, FP2 (picks: 0,1)
- Parietal lobe: C3, Cz, C4, P3, Pz, P4 (picks: 2,10,3,4,11,5)
- Temporal lobe: T7, T8 (picks: 8,9)
- Occipital lobe: O1, O2 (picks: 6,7)

Next, we can plot epochs of each sleep stage as an image map: <br>
1. each row of pixels in the image representing a single epoch
2. the horizontal axis representing time
3. each pixel's color representing the signal value at that time sample for that epoch

We saved the image maps.

Plot image maps and save.

In [16]:
# mne.make_fixed_length_epochs(raws[3], 30, preload=False)

In [17]:
# def EpochImageMap(raw, chunk_duration=30):
#     epoch = mne.make_fixed_length_epochs(raw, chunk_duration, preload=False)
#     picks = epoch.picks
#     images = epoch.plot_image(picks=picks, group_by=dict(Frontal=[0,1], Parietal=[2,10,3,4,11,5], Temporal=[8,9], Occipital=[6,7]),combine='median')
    
#     file_number = list(epoch.info['subject_info'].values())[0]
#     location = ['frontal', 'parietal', 'temporal', 'occipital']
#     count=-1
#     for image in images:
#         count+=1
#         im_name = './analysis images/hyper(36)/' + file_number +'-'+location[count] + '.jpg' #add image name
#         image.savefig(im_name)
#         plt.close('all')
#         plt.close(image)
        
#     print('successful')

In [18]:
# raws_subset

In [19]:
# for raw in raws_subset:
#     EpochImageMap(raw)

### Connectivity analysis

EEG信号按频谱不同可划分为四种基本类型：<br>

- α波：频率分布为8-13HZ，主要包含两个波段，μ1（8-10HZ）和μ2（10-13HZ），振幅约为20-100μV，在枕页及顶叶候补α波最显著。
- β波：频率在（13-30HZ）包含两个波段β1（13-20HZ），β2（20-30HZ），振幅约为5-20μV，主要出现在额叶。
- θ波：频率在4-7HZ，振幅约为100-150μV，在困倦时出现，是中枢神经系统抑制的表现。
- δ波：频率在0.5-3.5HZ，振幅约为0-200μV，只出现在睡眠，深度麻醉，缺氧或大脑病变时出现。

- https://mne.tools/0.23/auto_tutorials/epochs/60_make_fixed_length_epochs.html

## Frequency analysis


In [20]:
epochs = mne.make_fixed_length_epochs(raws[0], duration=30, preload=False)

In [21]:
epochs

0,1
Number of events,488
Events,1: 488
Time range,0.000 – 29.998 s
Baseline,off


In [None]:
for epoch in epochs_all:
    file_number = list(epoch.info['subject_info'].values())[0]
    print('Subject: ', file_number)
    epoch["AS1"].compute_psd(picks=[1,2,4,11]).plot()
    plt.show()

In [None]:
if __name__ == '__main__' :
    Threads = []
    epochs_all = []
    i = 0
    for raw in raws_4:
        t = MyThread(func=extract_epochs,args=raw,name ='Thread'+ str(i))
        i+=1
        Threads.append(t)
        t.start()
    for t in Threads:
        t.join()
        epochs_all.append(t.getResult()) #epochs_all would be the dataset of all epochs