In [1]:
import re, datetime, operator, logging, sys
import numpy as np
from collections import namedtuple


In [2]:
import os
import numpy as np

import argparse
import glob
import math
import ntpath

import shutil
import urllib
# import urllib2

from datetime import datetime
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
from mne.io import concatenate_raws, read_raw_edf
#import dhedfreader
import xml.etree.ElementTree as ET

In [10]:
cd shhs1

/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1


In [11]:
ls

copy.py                   shhs1-201254.edf  shhs1-203433.edf  shhs1-204529.edf
selected_shhs1_files.txt  shhs1-201270.edf  shhs1-203460.edf  shhs1-204543.edf
shhs1-200010.edf          shhs1-201316.edf  shhs1-203478.edf  shhs1-204550.edf
shhs1-200017.edf          shhs1-201329.edf  shhs1-203479.edf  shhs1-204553.edf
shhs1-200039.edf          shhs1-201359.edf  shhs1-203514.edf  shhs1-204554.edf
shhs1-200052.edf          shhs1-201371.edf  shhs1-203523.edf  shhs1-204576.edf
shhs1-200097.edf          shhs1-201515.edf  shhs1-203533.edf  shhs1-204611.edf
shhs1-200122.edf          shhs1-201552.edf  shhs1-203546.edf  shhs1-204641.edf
shhs1-200139.edf          shhs1-201560.edf  shhs1-203547.edf  shhs1-204666.edf
shhs1-200152.edf          shhs1-201566.edf  shhs1-203580.edf  shhs1-204684.edf
shhs1-200166.edf          shhs1-201581.edf  shhs1-203614.edf  shhs1-204688.edf
shhs1-200174.edf          shhs1-201586.edf  shhs1-203622.edf  shhs1-204706.edf
shhs1-200178.edf          shhs1-201637.e

In [3]:


EVENT_CHANNEL = 'EDF Annotations'
# log = logging.getLogger(__name__)

class EDFEndOfData(Exception): pass

def tal(tal_str):
    '''Return a list with (onset, duration, annotation) tuples for an EDF+ TAL
  stream.
  '''
    exp = '(?P<onset>[+\-]\d+(?:\.\d*)?)' + \
'(?:\x15(?P<duration>\d+(?:\.\d*)?))?' + \
'(\x14(?P<annotation>[^\x00]*))?' + \
'(?:\x14\x00)'

    def annotation_to_list(annotation):
        return str(annotation.encode('utf-8')).split('\x14') if annotation else []

    def parse(dic):
        return (
      float(dic['onset']),
      float(dic['duration']) if dic['duration'] else 0.,
      annotation_to_list(dic['annotation']))

    return [parse(m.groupdict()) for m in re.finditer(exp, tal_str)]


def edf_header(f):
    h = {}
    assert f.tell() == 0  # check file position
    assert f.read(8) == '0       '

    # recording info)
    h['local_subject_id'] = f.read(80).strip()
    h['local_recording_id'] = f.read(80).strip()

    # parse timestamp
    (day, month, year) = [int(x) for x in re.findall('(\d+)', f.read(8))]
    (hour, minute, sec)= [int(x) for x in re.findall('(\d+)', f.read(8))]
    h['date_time'] = str(datetime.datetime(year + 2000, month, day,
    hour, minute, sec))

    # misc
    header_nbytes = int(f.read(8))
    subtype = f.read(44)[:5]
    h['EDF+'] = subtype in ['EDF+C', 'EDF+D']
    h['contiguous'] = subtype != 'EDF+D'
    h['n_records'] = int(f.read(8))
    h['record_length'] = float(f.read(8))  # in seconds
    nchannels = h['n_channels'] = int(f.read(4))

    # read channel info
    channels = range(h['n_channels'])
    h['label'] = [f.read(16).strip() for n in channels]
    h['transducer_type'] = [f.read(80).strip() for n in channels]
    h['units'] = [f.read(8).strip() for n in channels]
    h['physical_min'] = np.asarray([float(f.read(8)) for n in channels])
    h['physical_max'] = np.asarray([float(f.read(8)) for n in channels])
    h['digital_min'] = np.asarray([float(f.read(8)) for n in channels])
    h['digital_max'] = np.asarray([float(f.read(8)) for n in channels])
    h['prefiltering'] = [f.read(80).strip() for n in channels]
    h['n_samples_per_record'] = [int(f.read(8)) for n in channels]
    f.read(32 * nchannels)  # reserved

    #assert f.tell() == header_nbytes
    return h


class BaseEDFReader:
    def __init__(self, file):
        self.file = file


    def read_header(self):
        self.header = h = edf_header(self.file)

        # calculate ranges for rescaling
        self.dig_min = h['digital_min']
        self.phys_min = h['physical_min']
        phys_range = h['physical_max'] - h['physical_min']
        dig_range = h['digital_max'] - h['digital_min']
        assert np.all(phys_range > 0)
        assert np.all(dig_range > 0)
        self.gain = phys_range / dig_range


    def read_raw_record(self):
        '''Read a record with data_2013 and return a list containing arrays with raw
        bytes.
        '''
        result = []
        for nsamp in self.header['n_samples_per_record']:
            samples = self.file.read(nsamp * 2)
            if len(samples) != nsamp * 2:
                raise EDFEndOfData
            result.append(samples)
        return result


    def convert_record(self, raw_record):
        '''Convert a raw record to a (time, signals, events) tuple based on
        information in the header.
        '''
        h = self.header
        dig_min, phys_min, gain = self.dig_min, self.phys_min, self.gain
        time = float('nan')
        signals = []
        events = []
        for (i, samples) in enumerate(raw_record):
            if h['label'][i] == EVENT_CHANNEL:
                ann = tal(samples)
                time = ann[0][0]
                events.extend(ann[1:])
                # print(i, samples)
                # exit()
            else:
                # 2-byte little-endian integers
                dig = np.fromstring(samples, '<i2').astype(np.float32)
                phys = (dig - dig_min[i]) * gain[i] + phys_min[i]
                signals.append(phys)

        return time, signals, events


    def read_record(self):
        return self.convert_record(self.read_raw_record())


    def records(self):
        '''
        Record generator.
        '''
        try:
            while True:
                yield self.read_record()
        except EDFEndOfData:
            pass


def load_edf(edffile):
    '''Load an EDF+ file.
  Very basic reader for EDF and EDF+ files. While BaseEDFReader does support
  exotic features like non-homogeneous sample rates and loading only parts of
  the stream, load_edf expects a single fixed sample rate for all channels and
  tries to load the whole file.
  Parameters
  ----------
  edffile : file-like object or string
  Returns
  -------
  Named tuple with the fields:
    X : NumPy array with shape p by n.
      Raw recording of n samples in p dimensions.
    sample_rate : float
      The sample rate of the recording. Note that mixed sample-rates are not
      supported.
    sens_lab : list of length p with strings
      The labels of the sensors used to record X.
    time : NumPy array with length n
      The time offset in the recording for each sample.
    annotations : a list with tuples
      EDF+ annotations are stored in (start, duration, description) tuples.
      start : float
        Indicates the start of the event in seconds.
      duration : float
        Indicates the duration of the event in seconds.
      description : list with strings
        Contains (multiple?) descriptions of the annotation event.
  '''
    if isinstance(edffile, basestring):
        with open(edffile, 'rb') as f:
            return load_edf(f)  # convert filename to file

    reader = BaseEDFReader(edffile)
    reader.read_header()

    h = reader.header
    log.debug('EDF header: %s' % h)

      # get sample rate info
    nsamp = np.unique(
        [n for (l, n) in zip(h['label'], h['n_samples_per_record'])
        if l != EVENT_CHANNEL])
    assert nsamp.size == 1, 'Multiple sample rates not supported!'
    sample_rate = float(nsamp[0]) / h['record_length']

    rectime, X, annotations = zip(*reader.records())
    X = np.hstack(X)
    annotations = reduce(operator.add, annotations)
    chan_lab = [lab for lab in reader.header['label'] if lab != EVENT_CHANNEL]

      # create timestamps
    if reader.header['contiguous']:
        time = np.arange(X.shape[1]) / sample_rate
    else:
        reclen = reader.header['record_length']
        within_rec_time = np.linspace(0, reclen, nsamp, endpoint=False)
        time = np.hstack([t + within_rec_time for t in rectime])

    tup = namedtuple('EDF', 'X sample_rate chan_lab time annotations')
    return tup(X, sample_rate, chan_lab, time, annotations)

In [6]:
EPOCH_SEC_SIZE = 30

# data on GNODE 25 DATE: 06-12-21 (ALL 329 files of SHHS1)


data_dir = '/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1'
ann_dir = '/scratch/SLEEP_data/shhs/polysomnography/annotations-events-profusion/shhs1'
output_dir = '/scratch/SLEEP_data/shhs/output'
select_ch = 'EEG C4-A1'  #EEG (sec)	C3	A2  #EEG	C4	A1

csv_path = '/scratch/SLEEP_data/selected_shhs1_files.txt'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

#ids = pd.read_csv("selected_shhs1_files.txt", header=None, names='a')
ids = pd.read_csv(csv_path, header=None)
ids = ids[0].values.tolist()

edf_fnames = [os.path.join(data_dir, i + ".edf") for i in ids]
ann_fnames = [os.path.join(ann_dir,  i + "-profusion.xml") for i in ids]

edf_fnames.sort()
ann_fnames.sort()

edf_fnames = np.asarray(edf_fnames)
ann_fnames = np.asarray(ann_fnames)

#yahase 
for file_id in range(len(edf_fnames)):
    if os.path.exists(os.path.join(output_dir, edf_fnames[file_id].split('/')[-1])[:-4]+".npz"):
        continue
    print(edf_fnames[file_id])
    select_ch = 'EEG C4-A1'
    raw = read_raw_edf(edf_fnames[file_id], preload=True, stim_channel=None, verbose=None)
    sampling_rate = raw.info['sfreq']
    ch_type = select_ch.split(" ")[0]    # selecting EEG out of 'EEG C4-A1'
    select_ch = sorted([s for s in raw.info["ch_names"] if ch_type in s]) # this has 2 vals [EEG,EEG(sec)] and selecting 0th index
    print(select_ch)
    raw_ch_df = raw.to_data_frame(scalings=sampling_rate)[select_ch]
    print(raw_ch_df.shape)
    #raw_ch_df = raw_ch_df.to_frame()
    raw_ch_df.set_index(np.arange(len(raw_ch_df)))
    
#####################################################

#     # X load
    
#     X = raw.get_data()
#     if X.shape[0] == 16:
#         X = X[[0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15], :]
#     elif X.shape[0] == 15:
#         X = X[[0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14], :]
#     X = X[[2,7], :]
#     print("1")
#     raw_ch_df = X.to_data_frame(scalings=sampling_rate)
#     print("2")
#     raw_ch_df = raw_ch_df.to_frame()
#     print("3")
#     raw_ch_df.set_index(np.arange(len(raw_ch_df)))
#     print("4")
#     print(abc)

###################################################
    labels = []
    # Read annotation and its header
    t = ET.parse(ann_fnames[file_id])
    r = t.getroot()
    faulty_File = 0
    for i in range(len(r[4])):
        lbl = int(r[4][i].text)
        if lbl == 4:  # make stages N3, N4 same as N3
            labels.append(3)
        elif lbl == 5:  # Assign label 4 for REM stage
            labels.append(4)
        else:
            labels.append(lbl)
        if lbl > 5:  # some files may contain labels > 5 BUT not the selected ones.
            faulty_File = 1

    if faulty_File == 1:
        print( "============================== Faulty file ==================")
        continue

    labels = np.asarray(labels)

    # Remove movement and unknown stages if any
    raw_ch = raw_ch_df.values
    print(raw_ch.shape)

    # Verify that we can split into 30-s epochs
    if len(raw_ch) % (EPOCH_SEC_SIZE * sampling_rate) != 0:
        raise Exception("Something wrong")
    n_epochs = len(raw_ch) / (EPOCH_SEC_SIZE * sampling_rate)

    # Get epochs and their corresponding labels
    x = np.asarray(np.split(raw_ch, n_epochs)).astype(np.float32)
    y = labels.astype(np.int32)

    print(x.shape)
    print(y.shape)
    assert len(x) == len(y)

    # Select on sleep periods
    w_edge_mins = 30
    nw_idx = np.where(y != 0)[0]
    start_idx = nw_idx[0] - (w_edge_mins * 2)
    end_idx = nw_idx[-1] + (w_edge_mins * 2)
    if start_idx < 0: start_idx = 0
    if end_idx >= len(y): end_idx = len(y) - 1
    select_idx = np.arange(start_idx, end_idx + 1)
    print("Data before selection: {}, {}".format(x.shape, y.shape))
    x = x[select_idx]
    y = y[select_idx]
    print("Data after selection: {}, {}".format(x.shape, y.shape))

    # Saving as numpy files
    filename = os.path.basename(edf_fnames[file_id]).replace(".edf",  ".npz")
    save_dict = {
        "x": x,
        "y": y,
        "fs": sampling_rate
    }
    np.savez(os.path.join(output_dir, filename), **save_dict)
    print(" ---------- Done this file ---------")

/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-200010.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-200010.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 4064999  =      0.000 ... 32519.992 secs...
['EEG', 'EEG(sec)']
(4065000, 2)
(4065000, 2)
(1084, 3750, 2)
(1084,)
Data before selection: (1084, 3750, 2), (1084,)
Data after selection: (1030, 3750, 2), (1030,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-200017.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-200017.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EE

(4001250, 2)
(4001250, 2)
(1067, 3750, 2)
(1067,)
Data before selection: (1067, 3750, 2), (1067,)
Data after selection: (1030, 3750, 2), (1030,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-200215.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-200215.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 3626249  =      0.000 ... 29009.992 secs...
['EEG', 'EEG(sec)']
(3626250, 2)
(3626250, 2)
(967, 3750, 2)
(967,)
Data before selection: (967, 3750, 2), (967,)
Data after selection: (962, 3750, 2), (962,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-200258.edf
Extracting EDF parameters from /scratch/SLEEP_data/s

Creating raw.info structure...
Reading 0 ... 3734999  =      0.000 ... 29879.992 secs...
['EEG', 'EEG(sec)']
(3735000, 2)
(3735000, 2)
(996, 3750, 2)
(996,)
Data before selection: (996, 3750, 2), (996,)
Data after selection: (980, 3750, 2), (980,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-200467.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-200467.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 3982499  =      0.000 ... 31859.992 secs...
['EEG', 'EEG(sec)']
(3982500, 2)
(3982500, 2)
(1062, 3750, 2)
(1062,)
Data before selection: (1062, 3750, 2), (1062,)
Data after selection: (919, 3750, 2), (919,)
 ---------- Done this file ---------
/scratch/SLEEP

Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 4008749  =      0.000 ... 32069.992 secs...
['EEG', 'EEG(sec)']
(4008750, 2)
(4008750, 2)
(1069, 3750, 2)
(1069,)
Data before selection: (1069, 3750, 2), (1069,)
Data after selection: (1069, 3750, 2), (1069,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-200701.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-200701.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 3536249  =      0.000 ... 28289.992 secs...
['EEG', 'EEG(sec)']


EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 4008749  =      0.000 ... 32069.992 secs...
['EEG', 'EEG(sec)']
(4008750, 2)
(4008750, 2)
(1069, 3750, 2)
(1069,)
Data before selection: (1069, 3750, 2), (1069,)
Data after selection: (1019, 3750, 2), (1019,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-200834.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-200834.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 3933749  =  

EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 3262499  =      0.000 ... 26099.992 secs...
['EEG', 'EEG(sec)']
(3262500, 2)
(3262500, 2)
(870, 3750, 2)
(870,)
Data before selection: (870, 3750, 2), (870,)
Data after selection: (807, 3750, 2), (807,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-200920.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-200920.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 4008749  =      0.

EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 3596249  =      0.000 ... 28769.992 secs...
['EEG', 'EEG(sec)']
(3596250, 2)
(3596250, 2)
(959, 3750, 2)
(959,)
Data before selection: (959, 3750, 2), (959,)
Data after selection: (891, 3750, 2), (891,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-201102.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-201102.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 3596249  =      0.

EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 3948749  =      0.000 ... 31589.992 secs...
['EEG', 'EEG(sec)']
(3948750, 2)
(3948750, 2)
(1053, 3750, 2)
(1053,)
Data before selection: (1053, 3750, 2), (1053,)
Data after selection: (1053, 3750, 2), (1053,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-201329.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-201329.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ..

 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-201725.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-201725.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 4049999  =      0.000 ... 32399.992 secs...
['EEG', 'EEG(sec)']
(4050000, 2)
(4050000, 2)
(1080, 3750, 2)
(1080,)
Data before selection: (1080, 3750, 2), (1080,)
Data after selection: (982, 3750, 2), (982,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-201748.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-201748.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the followi

['EEG', 'EEG(sec)']
(3952500, 2)
(3952500, 2)
(1054, 3750, 2)
(1054,)
Data before selection: (1054, 3750, 2), (1054,)
Data after selection: (1054, 3750, 2), (1054,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-202139.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-202139.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, AIR
Creating raw.info structure...
Reading 0 ... 3708749  =      0.000 ... 29669.992 secs...
['EEG', 'EEG(sec)']
(3708750, 2)
(3708750, 2)
(989, 3750, 2)
(989,)
Data before selection: (989, 3750, 2), (989,)
Data after selection: (936, 3750, 2), (936,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-202160.edf
Extracting EDF parameters from 

Creating raw.info structure...
Reading 0 ... 3821249  =      0.000 ... 30569.992 secs...
['EEG', 'EEG(sec)']
(3821250, 2)
(3821250, 2)
(1019, 3750, 2)
(1019,)
Data before selection: (1019, 3750, 2), (1019,)
Data after selection: (971, 3750, 2), (971,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-202424.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-202424.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, NEWAIR, stat
Creating raw.info structure...
Reading 0 ... 3821249  =      0.000 ... 30569.992 secs...
['EEG', 'EEG(sec)']
(3821250, 2)
(3821250, 2)
(1019, 3750, 2)
(1019,)
Data before selection: (1019, 3750, 2), (1019,)
Data after selection: (967, 3750, 2), (967,)
 ---------- Done this file ---------


Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 4072499  =      0.000 ... 32579.992 secs...
['EEG', 'EEG(sec)']
(4072500, 2)
(4072500, 2)
(1086, 3750, 2)
(1086,)
Data before selection: (1086, 3750, 2), (1086,)
Data after selection: (1016, 3750, 2), (1016,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-202865.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-202865.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 4072499  =      0.000 ... 32579.992 secs...
['EEG', 'EEG(sec)']
(4072500, 2)
(4072

EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, Air, stat
Creating raw.info structure...
Reading 0 ... 3941249  =      0.000 ... 31529.992 secs...
['EEG', 'EEG(sec)']
(3941250, 2)
(3941250, 2)
(1051, 3750, 2)
(1051,)
Data before selection: (1051, 3750, 2), (1051,)
Data after selection: (969, 3750, 2), (969,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-203168.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-203168.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, AUX, stat
Creating raw.info structure...
Reading 0 ... 

EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, Air, stat
Creating raw.info structure...
Reading 0 ... 3941249  =      0.000 ... 31529.992 secs...
['EEG', 'EEG(sec)']
(3941250, 2)
(3941250, 2)
(1051, 3750, 2)
(1051,)
Data before selection: (1051, 3750, 2), (1051,)
Data after selection: (1051, 3750, 2), (1051,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-203324.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-203324.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., ECG, EMG, EOG(L), EOG(R), EEG, SOUND, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, A/F, stat
Creating raw.info structure...
Reading 0 ... 3948749 

 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-203478.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-203478.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, Air, stat
Creating raw.info structure...
Reading 0 ... 3948749  =      0.000 ... 31589.992 secs...
['EEG', 'EEG(sec)']
(3948750, 2)
(3948750, 2)
(1053, 3750, 2)
(1053,)
Data before selection: (1053, 3750, 2), (1053,)
Data after selection: (1034, 3750, 2), (1034,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-203479.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-203479.edf...
EDF file detected
Setting channel info structure...
Could not determine channel t

Creating raw.info structure...
Reading 0 ... 3941249  =      0.000 ... 31529.992 secs...
['EEG', 'EEG(sec)']
(3941250, 2)
(3941250, 2)
(1051, 3750, 2)
(1051,)
Data before selection: (1051, 3750, 2), (1051,)
Data after selection: (1006, 3750, 2), (1006,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-203687.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-203687.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, Air, stat
Creating raw.info structure...
Reading 0 ... 3948749  =      0.000 ... 31589.992 secs...
['EEG', 'EEG(sec)']
(3948750, 2)
(3948750, 2)
(1053, 3750, 2)
(1053,)
Data before selection: (1053, 3750, 2), (1053,)
Data after selection: (945, 3750, 2), (945,)
 ---------- Done this file ---------
/

EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, Air, stat
Creating raw.info structure...
Reading 0 ... 3941249  =      0.000 ... 31529.992 secs...
['EEG', 'EEG(sec)']
(3941250, 2)
(3941250, 2)
(1051, 3750, 2)
(1051,)
Data before selection: (1051, 3750, 2), (1051,)
Data after selection: (978, 3750, 2), (978,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-203870.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-203870.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, Air, stat
Creating raw.info structure...
Reading 0 ... 

EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 4008749  =      0.000 ... 32069.992 secs...
['EEG', 'EEG(sec)']
(4008750, 2)
(4008750, 2)
(1069, 3750, 2)
(1069,)
Data before selection: (1069, 3750, 2), (1069,)
Data after selection: (953, 3750, 2), (953,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204041.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204041.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 3596249  =      0.

 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204111.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204111.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 3821249  =      0.000 ... 30569.992 secs...
['EEG', 'EEG(sec)']
(3821250, 2)
(3821250, 2)
(1019, 3750, 2)
(1019,)
Data before selection: (1019, 3750, 2), (1019,)
Data after selection: (992, 3750, 2), (992,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204115.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204115.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the 

['EEG', 'EEG(sec)']
(4001250, 2)
(4001250, 2)
(1067, 3750, 2)
(1067,)
Data before selection: (1067, 3750, 2), (1067,)
Data after selection: (985, 3750, 2), (985,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204234.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204234.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, AIRFLOW-0, RES-0, RES-1, POSITION, LIGHT, AIRFLOW-1, stat
Creating raw.info structure...
Reading 0 ... 3371249  =      0.000 ... 26969.992 secs...
['EEG', 'EEG(sec)']
(3371250, 2)
(3371250, 2)
(899, 3750, 2)
(899,)
Data before selection: (899, 3750, 2), (899,)
Data after selection: (899, 3750, 2), (899,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204282.edf
Extracting EDF para

Creating raw.info structure...
Reading 0 ... 3933749  =      0.000 ... 31469.992 secs...
['EEG', 'EEG(sec)']
(3933750, 2)
(3933750, 2)
(1049, 3750, 2)
(1049,)
Data before selection: (1049, 3750, 2), (1049,)
Data after selection: (1014, 3750, 2), (1014,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204405.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204405.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 3821249  =      0.000 ... 30569.992 secs...
['EEG', 'EEG(sec)']
(3821250, 2)
(3821250, 2)
(1019, 3750, 2)
(1019,)
Data before selection: (1019, 3750, 2), (1019,)
Data after selection: (923, 3750, 2), (923,)
 ---------- Done this file ---------
/scratch/S

Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 4008749  =      0.000 ... 32069.992 secs...
['EEG', 'EEG(sec)']
(4008750, 2)
(4008750, 2)
(1069, 3750, 2)
(1069,)
Data before selection: (1069, 3750, 2), (1069,)
Data after selection: (900, 3750, 2), (900,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204550.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204550.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 4008749  =      0.000 ... 32069.992 secs...
['EEG', 'EEG(sec)']
(40087

EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 3596249  =      0.000 ... 28769.992 secs...
['EEG', 'EEG(sec)']
(3596250, 2)
(3596250, 2)
(959, 3750, 2)
(959,)
Data before selection: (959, 3750, 2), (959,)
Data after selection: (907, 3750, 2), (907,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204724.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204724.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 3671249  =      0.000 

EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 4001249  =      0.000 ... 32009.992 secs...
['EEG', 'EEG(sec)']
(4001250, 2)
(4001250, 2)
(1067, 3750, 2)
(1067,)
Data before selection: (1067, 3750, 2), (1067,)
Data after selection: (998, 3750, 2), (998,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204783.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204783.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 3596249  =      0.

EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 4008749  =      0.000 ... 32069.992 secs...
['EEG', 'EEG(sec)']
(4008750, 2)
(4008750, 2)
(1069, 3750, 2)
(1069,)
Data before selection: (1069, 3750, 2), (1069,)
Data after selection: (1012, 3750, 2), (1012,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204844.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204844.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 4008749  =      

EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 3371249  =      0.000 ... 26969.992 secs...
['EEG', 'EEG(sec)']
(3371250, 2)
(3371250, 2)
(899, 3750, 2)
(899,)
Data before selection: (899, 3750, 2), (899,)
Data after selection: (884, 3750, 2), (884,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204916.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-204916.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 4008749  =      0.000 

EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, AIR
Creating raw.info structure...
Reading 0 ... 3596249  =      0.000 ... 28769.992 secs...
['EEG', 'EEG(sec)']
(3596250, 2)
(3596250, 2)
(959, 3750, 2)
(959,)
Data before selection: (959, 3750, 2), (959,)
Data after selection: (959, 3750, 2), (959,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-205178.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-205178.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 3948749  =

 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-205502.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-205502.edf...
EDF file detected
Setting channel info structure...
Could not determine channel type of the following channels, they will be set as EEG:
SaO2, H.R., EEG(sec), ECG, EMG, EOG(L), EOG(R), EEG, SOUND, AIRFLOW, RES-0, RES-1, POSITION, LIGHT, AIR, stat
Creating raw.info structure...
Reading 0 ... 3821249  =      0.000 ... 30569.992 secs...
['EEG', 'EEG(sec)']
(3821250, 2)
(3821250, 2)
(1019, 3750, 2)
(1019,)
Data before selection: (1019, 3750, 2), (1019,)
Data after selection: (947, 3750, 2), (947,)
 ---------- Done this file ---------
/scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-205516.edf
Extracting EDF parameters from /scratch/SLEEP_data/shhs/polysomnography/edfs/shhs1/shhs1-205516.edf...
EDF file detected
Setting channel info structure...
Could not determine channel typ

In [3]:
import os
import torch
import numpy as np
import argparse

seed = 123
np.random.seed(seed)


# parser = argparse.ArgumentParser()

# parser.add_argument("--dir", type=str, default="/scratch/SLEEP_data",
#                     help="File path to the PSG and annotation files.")

# args = parser.parse_args()

dire = '/scratch/SLEEP_data/shhs'
data_dir = os.path.join(dire, "output")    #on gnode27 = "numpy_subjects"
output_dir = dire

files = os.listdir(data_dir)
files = np.array([os.path.join(data_dir, i) for i in files])
files.sort()

#print(files)

######## pretext files##########
pretext_files = list(np.random.choice(files,264,replace=False))    #change

print("pretext files: ", len(pretext_files))

# load files

X_train = np.load(pretext_files[0])["x"]
y_train = np.load(pretext_files[0])["y"]
c=0
for np_file in pretext_files[0:]:
    c =c+1
    print(c)
    print(os.path.basename(np_file))
    X_train = np.vstack((X_train, np.load(np_file)["x"]))
    y_train = np.append(y_train, np.load(np_file)["y"])


data_save = dict()
data_save["samples"] = torch.from_numpy(X_train.transpose(0, 2, 1))
data_save["labels"] = torch.from_numpy(y_train)

torch.save(data_save, os.path.join(output_dir, "pretext.pt"))

######## training files ##########
training_files = list(np.random.choice(sorted(list(set(files)-set(pretext_files))),31,replace=False))  #change

print("\n =========================================== \n")
print("training files: ", len(training_files))

# load files
X_train = np.load(training_files[0])["x"]
y_train = np.load(training_files[0])["y"]

for np_file in training_files[1:]:
    print(os.path.basename(np_file))
    X_train = np.vstack((X_train, np.load(np_file)["x"]))
    y_train = np.append(y_train, np.load(np_file)["y"])

data_save = dict()
data_save["samples"] = torch.from_numpy(X_train.transpose(0, 2, 1))
data_save["labels"] = torch.from_numpy(y_train)
torch.save(data_save, os.path.join(output_dir, "train.pt"))

######## validation files ##########
validation_files = sorted(list(set(files)-set(pretext_files)-set(training_files))) #list(np.random.choice(sorted(list(set(files)-set(pretext_files)-set(training_files))),32,replace=False))    # left =32

print("\n =========================================== \n")
print("validation files: ", len(validation_files))

# load files
X_train = np.load(validation_files[0])["x"]
y_train = np.load(validation_files[0])["y"]

for np_file in validation_files[1:]:
    print(os.path.basename(np_file))
    X_train = np.vstack((X_train, np.load(np_file)["x"]))
    y_train = np.append(y_train, np.load(np_file)["y"])


data_save = dict()
data_save["samples"] = torch.from_numpy(X_train.transpose(0, 2, 1))
data_save["labels"] = torch.from_numpy(y_train)

torch.save(data_save, os.path.join(output_dir, "val.pt"))

pretext files:  264
1
shhs1-200214.npz
2
shhs1-203358.npz
3
shhs1-200834.npz
4
shhs1-204187.npz
5
shhs1-204802.npz
6
shhs1-202723.npz
7
shhs1-200884.npz
8
shhs1-200408.npz
9
shhs1-204879.npz
10
shhs1-203533.npz
11
shhs1-200886.npz
12
shhs1-204094.npz
13
shhs1-204737.npz
14
shhs1-205780.npz
15
shhs1-201725.npz
16
shhs1-203997.npz
17
shhs1-204985.npz
18
shhs1-205213.npz
19
shhs1-202267.npz
20
shhs1-200780.npz
21
shhs1-203171.npz
22
shhs1-204027.npz
23
shhs1-204149.npz
24
shhs1-200606.npz
25
shhs1-204318.npz
26
shhs1-203884.npz
27
shhs1-201706.npz
28
shhs1-204405.npz
29
shhs1-205516.npz
30
shhs1-204047.npz
31
shhs1-203168.npz
32
shhs1-204107.npz
33
shhs1-205083.npz
34
shhs1-201237.npz
35
shhs1-204009.npz
36
shhs1-201581.npz
37
shhs1-205429.npz
38
shhs1-203185.npz
39
shhs1-204724.npz
40
shhs1-203651.npz
41
shhs1-204550.npz
42
shhs1-203734.npz
43
shhs1-200496.npz
44
shhs1-200215.npz
45
shhs1-203580.npz
46
shhs1-204473.npz
47
shhs1-200569.npz
48
shhs1-204434.npz
49
shhs1-204062.npz
50
shhs1-

In [7]:
cd output

/scratch/sleep/output


['SaO2',
 'H.R.',
 'EEG2',
 'ECG',
 'EMG',
 'EOG(L)',
 'EOG(R)',
 'EEG',
 'AIRFLOW',
 'RES-0',
 'RES-1',
 'POSITION',
 'LIGHT',
 'AIR',
 'stat']

In [21]:
ls

copy.py                   shhs1-201254.edf  shhs1-203433.edf  shhs1-204529.edf
selected_shhs1_files.txt  shhs1-201270.edf  shhs1-203460.edf  shhs1-204543.edf
shhs1-200010.edf          shhs1-201316.edf  shhs1-203478.edf  shhs1-204550.edf
shhs1-200017.edf          shhs1-201329.edf  shhs1-203479.edf  shhs1-204553.edf
shhs1-200039.edf          shhs1-201359.edf  shhs1-203514.edf  shhs1-204554.edf
shhs1-200052.edf          shhs1-201371.edf  shhs1-203523.edf  shhs1-204576.edf
shhs1-200097.edf          shhs1-201515.edf  shhs1-203533.edf  shhs1-204611.edf
shhs1-200122.edf          shhs1-201552.edf  shhs1-203546.edf  shhs1-204641.edf
shhs1-200139.edf          shhs1-201560.edf  shhs1-203547.edf  shhs1-204666.edf
shhs1-200152.edf          shhs1-201566.edf  shhs1-203580.edf  shhs1-204684.edf
shhs1-200166.edf          shhs1-201581.edf  shhs1-203614.edf  shhs1-204688.edf
shhs1-200174.edf          shhs1-201586.edf  shhs1-203622.edf  shhs1-204706.edf
shhs1-200178.edf          shhs1-201637.e