### Process
Two choices:
1. Loop through files and create bigger files, more close to HMP data format in xArray files
OR:
1. Create a DataLoader that loops through remaining files, loads it in, checks if it fits (has the correct channels), then trains using that file
This approach would mean that files that are unfit are re-checked every time, instead of just doing another pass over the data and saving only those that are fit for data. Could also create a subset of the paths for all files that are fit, then we wouldn't have to re-check

In [22]:
import glob
from pathlib import Path
import pyedflib
from hmpai.pytorch.generators import TUEGDataset

In [28]:
# Write filenames to file so we dont need to search through folder structure
path = Path("../../data/tueg")

files = path.rglob("*.edf")

with open('edf_files.txt', 'w') as file:
    for f_name in files:
        file.write(str(f_name) + "\n")

In [38]:
# Only look for files with sampling frequency of 250
FREQ = 250
with open('edf_files.txt', 'r') as file:
    files = file.readlines()

for file in files:
    file = file.rstrip("\n")
    print(file)
    edf = pyedflib.EdfReader(file)

    # Skip if not FREQ sampling frequency
    file_freq = edf.getSampleFrequency(0)
    if file_freq != FREQ:
        continue
        # is AR always 256 instead of 250?
    print(edf.getSignalLabels())

../../data/tueg/tuh_eeg/v2.0.1/edf/000/aaaaaaaa/s001_2015/01_tcp_ar/aaaaaaaa_s001_t000.edf
../../data/tueg/tuh_eeg/v2.0.1/edf/000/aaaaaaab/s001_2002/02_tcp_le/aaaaaaab_s001_t000.edf
['EEG FP1-LE', 'EEG FP2-LE', 'EEG F3-LE', 'EEG F4-LE', 'EEG C3-LE', 'EEG C4-LE', 'EEG A1-LE', 'EEG A2-LE', 'EEG P3-LE', 'EEG P4-LE', 'EEG O1-LE', 'EEG O2-LE', 'EEG F7-LE', 'EEG F8-LE', 'EEG T3-LE', 'EEG T4-LE', 'EEG T5-LE', 'EEG T6-LE', 'EEG FZ-LE', 'EEG CZ-LE', 'EEG PZ-LE', 'EEG OZ-LE', 'EEG PG1-LE', 'EEG PG2-LE', 'EEG EKG-LE', 'EEG SP2-LE', 'EEG SP1-LE', 'EEG RLC-LE', 'EEG LUC-LE', 'EEG 30-LE', 'EEG T1-LE', 'EEG T2-LE', 'PHOTIC PH']
../../data/tueg/tuh_eeg/v2.0.1/edf/000/aaaaaaab/s001_2002/02_tcp_le/aaaaaaab_s001_t001.edf
['EEG FP1-LE', 'EEG FP2-LE', 'EEG F3-LE', 'EEG F4-LE', 'EEG C3-LE', 'EEG C4-LE', 'EEG A1-LE', 'EEG A2-LE', 'EEG P3-LE', 'EEG P4-LE', 'EEG O1-LE', 'EEG O2-LE', 'EEG F7-LE', 'EEG F8-LE', 'EEG T3-LE', 'EEG T4-LE', 'EEG T5-LE', 'EEG T6-LE', 'EEG FZ-LE', 'EEG CZ-LE', 'EEG PZ-LE', 'EEG OZ-LE',

KeyboardInterrupt: 