This notebook is for a pipeline of analyzing and visualizing a time-seires dataset. 
Hoping this pipeline is comprehensive so that we can get more dataset-side insights.
## Step 1: load raw data and preprocess

In [9]:
import pathlib, mne, pandas as pd

edf_dir = pathlib.Path("/scratch/besp/shared_data/nchsdb/sleep_data")
all_channels = set()       
rows = []                 

num_of_file_checked = 0
for edf_path in edf_dir.glob("**/*.edf"):       
    num_of_file_checked +=1
    raw = mne.io.read_raw_edf(edf_path, preload=False, verbose="error")

    hdr          = raw._raw_extras[0]
    rec_len_sec  = hdr["record_length"]
    n_samps_list = hdr["n_samps"]

    for idx, ch in enumerate(raw.info["chs"]):
        ch_name = ch["ch_name"]
        all_channels.add(ch_name)                


        sfreq = n_samps_list[idx] / rec_len_sec
        rows.append(
            dict(
                file       = edf_path.name,
                channel    = ch_name,
                sfreq_hz   = sfreq,
                phys_unit  = ch.get("unit", "—"),
                lowpass    = ch.get("lowpass",  "—"),
                highpass   = ch.get("highpass", "—"),
            )
        )
    if num_of_file_checked%100 == 0:
        print(num_of_file_checked)

channel_list = sorted(all_channels)
pd.Series(channel_list, name="channel").to_csv(
    edf_dir / "nchsdb_all_channels.csv", index=False
)


if rows:
    df = pd.DataFrame(rows)
    df.to_csv(edf_dir / "nchsdb_channels_details.csv", index=False)
print(df['channel'].unique())
print(df['channel'].value_counts())
df.to_csv("nchsdb.csv")

'''
['Patient Event' 'EOG LOC-M2' 'EOG ROC-M1' 'EMG Chin1-Chin2' 'EEG F3-M2'
 'EEG F4-M1' 'EEG C3-M2' 'EEG C4-M1' 'EEG O1-M2' 'EEG O2-M1' 'EEG CZ-O1'
 'EMG LLeg-RLeg' 'ECG EKG2-EKG' 'Snore' 'Resp PTAF' 'Resp Airflow'
 'Resp Thoracic' 'Resp Abdominal' 'SpO2' 'Rate' 'EtCO2' 'Capno'
 'Resp Rate' 'C-flow' 'Tidal Vol' 'Pressure' 'EMG CHIN1-CHIN2' 'EEG Cz-O1'
 'EMG LLEG+-LLEG-' 'EMG RLEG+-RLEG-' 'ECG LA-RA' 'Snore_DR' 'Flow_DR'
 'Resp Flow' 'XFlow' 'Resp Chest' 'Resp Abdomen' 'OSAT' 'TcCO2' 'C-Flow'
 'C-Pressure' 'EEG Chin1-Chin2' 'EEG LOC-M2' 'EMG Chin2-Chin1'
 'EMG Chin3-Chin2' 'EMG CHIN1-CHIN3' 'EMG LAT1-LAT2' 'EMG RAT1-RAT2'
 'ECG ECGL-ECGR' 'SNORE_DR' 'PTAF' 'Resp FLOW-Ref' 'EMG LLEG-RLEG'
 'EMG Chin1-Chin3' 'EEG ROC-M1' 'EEG O1' 'EEG O2' 'EEG C3' 'EEG C4'
 'EEG F3' 'EEG F4' 'EEG E1' 'EEG E2' 'EEG M1' 'EEG M2' 'EEG Chin1'
 'EEG Chin2' 'EEG Chin3' 'EEG EKG1' 'EEG EKG2' 'EEG RLeg1' 'EEG RLeg2'
 'EEG LLeg1' 'EEG LLeg2' 'EEG 20' 'EEG 21' 'EEG 22' 'EEG 23' 'EEG 24'
 'EEG 25' 'EEG 26' 'EEG 27' 'EEG 28' 'EEG 29' 'EEG 30' 'EEG 31' 'EEG 32'
 'EEG 33' 'EEG Spare' 'EEG Therm' 'EEG Snore' 'EEG Press' 'EEG Chest'
 'EEG Abd' 'EEG 40' 'EEG Chin3-Chin2' 'EEG EKG2-EKG' 'EEG ROC-M2'
 'EEG F4-M2' 'EEG C4-M2' 'EEG Chin1-Chin3' 'EEG EKG-RLeg' 'Position'
 'EMG LLEG-LLEG2' 'EMG RLEG-RLEG2' 'Resp Airflow+-Re' 'Chin1' 'Chin2'
 'Fp1' 'Fp2' 'F7' 'F8' 'F3' 'F4' 'T3' 'T4' 'C3' 'C4' 'T5' 'T6' 'P3' 'P4'
 'O1' 'O2' 'FZ' 'CZ' 'PZ' 'M1' 'M2' 'RLeg' 'LLeg' 'ROC' 'LOC' 'FPZ' 'OZ'
 'Chin3' 'EKG2' 'EKG' 'Airflow' 'Thoracic' 'Abdominal' '38' '39' '40'
 'DC8' 'DC3' 'DC4' 'PPG' 'Pleth' 'OSat' 'PR']
'''

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
['Patient Event' 'EOG LOC-M2' 'EOG ROC-M1' 'EMG Chin1-Chin2' 'EEG F3-M2'
 'EEG F4-M1' 'EEG C3-M2' 'EEG C4-M1' 'EEG O1-M2' 'EEG O2-M1' 'EEG CZ-O1'
 'EMG LLeg-RLeg' 'ECG EKG2-EKG' 'Snore' 'Resp PTAF' 'Resp Airflow'
 'Resp Thoracic' 'Resp Abdominal' 'SpO2' 'Rate' 'EtCO2' 'Capno'
 'Resp Rate' 'C-flow' 'Tidal Vol' 'Pressure' 'EMG CHIN1-CHIN2' 'EEG Cz-O1'
 'EMG LLEG+-LLEG-' 'EMG RLEG+-RLEG-' 'ECG LA-RA' 'Snore_DR' 'Flow_DR'
 'Resp Flow' 'XFlow' 'Resp Chest' 'Resp Abdomen' 'OSAT' 'TcCO2' 'C-Flow'
 'C-Pressure' 'EEG Chin1-Chin2' 'EEG LOC-M2' 'EMG Chin2-Chin1'
 'EMG Chin3-Chin2' 'EMG CHIN1-CHIN3' 'EMG LAT1-LAT2' 'EMG RAT1-RAT2'
 'ECG ECGL-ECGR' 'SNORE_DR' 'PTAF' 'Resp FLOW-Ref' 'EMG LLEG-RLEG'
 'EMG Chin1-Chin3' 'EEG ROC-M1' 'EEG O1' 'EEG O2' 'EEG C3' 'EEG C4'
 'EEG F3' 'EEG F4' 'EEG E1' 'EEG E2' 'EEG M1' 'EEG M

"\n['Patient Event' 'EOG LOC-M2' 'EOG ROC-M1' 'EMG Chin1-Chin2' 'EEG F3-M2'\n 'EEG F4-M1' 'EEG C3-M2' 'EEG C4-M1' 'EEG O1-M2' 'EEG O2-M1' 'EEG CZ-O1'\n 'EMG LLeg-RLeg' 'ECG EKG2-EKG' 'Snore' 'Resp PTAF' 'Resp Airflow'\n 'Resp Thoracic' 'Resp Abdominal' 'SpO2' 'Rate' 'EtCO2' 'Capno'\n 'Resp Rate' 'C-flow' 'Tidal Vol' 'Pressure' 'EMG CHIN1-CHIN2' 'EEG Cz-O1'\n 'EMG LLEG+-LLEG-' 'EMG RLEG+-RLEG-' 'ECG LA-RA' 'Snore_DR' 'Flow_DR'\n 'Resp Flow' 'XFlow' 'Resp Chest' 'Resp Abdomen' 'OSAT' 'TcCO2' 'C-Flow'\n 'C-Pressure' 'EEG Chin1-Chin2' 'EEG LOC-M2' 'EMG Chin2-Chin1'\n 'EMG Chin3-Chin2' 'EMG CHIN1-CHIN3' 'EMG LAT1-LAT2' 'EMG RAT1-RAT2'\n 'ECG ECGL-ECGR' 'SNORE_DR' 'PTAF' 'Resp FLOW-Ref' 'EMG LLEG-RLEG'\n 'EMG Chin1-Chin3' 'EEG ROC-M1' 'EEG O1' 'EEG O2' 'EEG C3' 'EEG C4'\n 'EEG F3' 'EEG F4' 'EEG E1' 'EEG E2' 'EEG M1' 'EEG M2' 'EEG Chin1'\n 'EEG Chin2' 'EEG Chin3' 'EEG EKG1' 'EEG EKG2' 'EEG RLeg1' 'EEG RLeg2'\n 'EEG LLeg1' 'EEG LLeg2' 'EEG 20' 'EEG 21' 'EEG 22' 'EEG 23' 'EEG 24'\n 'EEG 25' 'EE

In [7]:
import pathlib, mne, pandas as pd

edf_dir = pathlib.Path("/scratch/besp/shared_data/shhs/polysomnography/edfs")
all_channels = set()       
rows = []                 

num_of_file_checked = 0
for edf_path in edf_dir.glob("**/*.edf"):       
    num_of_file_checked +=1
    raw = mne.io.read_raw_edf(edf_path, preload=False, verbose="error")

    hdr          = raw._raw_extras[0]
    rec_len_sec  = hdr["record_length"]
    n_samps_list = hdr["n_samps"]

    for idx, ch in enumerate(raw.info["chs"]):
        ch_name = ch["ch_name"]
        all_channels.add(ch_name)                


        sfreq = n_samps_list[idx] / rec_len_sec
        rows.append(
            dict(
                file       = edf_path.name,
                channel    = ch_name,
                sfreq_hz   = sfreq,
                phys_unit  = ch.get("unit", "—"),
                lowpass    = ch.get("lowpass",  "—"),
                highpass   = ch.get("highpass", "—"),
            )
        )
    if num_of_file_checked%100 == 0:
        print(num_of_file_checked)

channel_list = sorted(all_channels)
pd.Series(channel_list, name="channel").to_csv(
    edf_dir / "nchsdb_all_channels.csv", index=False
)


if rows:
    df = pd.DataFrame(rows)
    df.to_csv(edf_dir / "nchsdb_channels_details.csv", index=False)
print(df['channel'].unique())
'''
['SaO2' 'H.R.' 'EEG(sec)' 'ECG' 'EMG' 'EOG(L)' 'EOG(R)' 'EEG' 'AIRFLOW'
 'THOR RES' 'ABDO RES' 'POSITION' 'LIGHT' 'NEW AIR' 'OX stat' 'SOUND'
 'New Air' 'NEWAIR' 'EEG 2' 'New A/F' 'AIRFLOW-0' 'AIRFLOW-1' 'EEG2' 'AUX'
 'EPMS' 'epms' 'EEG sec' 'CPAP' 'EEG(SEC)' 'LEG(L)' 'LEG(R)' 'NASAL'
 'New AIR' 'new air' 'PR' 'OX STAT']
'''

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
7300
7400
7500
7600
7700
7800
7900
8000
8100
8200
8300
8400


In [None]:
import pathlib, mne, pandas as pd

edf_dir = pathlib.Path("/scratch/besp/shared_data/chat/polysomnography/edfs")
all_channels = set()       
rows = []                 

num_of_file_checked = 0
for edf_path in edf_dir.glob("**/*.edf"):       
    num_of_file_checked +=1
    raw = mne.io.read_raw_edf(edf_path, preload=False, verbose="error")

    hdr          = raw._raw_extras[0]
    rec_len_sec  = hdr["record_length"]
    n_samps_list = hdr["n_samps"]

    for idx, ch in enumerate(raw.info["chs"]):
        ch_name = ch["ch_name"]
        all_channels.add(ch_name)                


        sfreq = n_samps_list[idx] / rec_len_sec
        rows.append(
            dict(
                file       = edf_path.name,
                channel    = ch_name,
                sfreq_hz   = sfreq,
                phys_unit  = ch.get("unit", "—"),
                lowpass    = ch.get("lowpass",  "—"),
                highpass   = ch.get("highpass", "—"),
            )
        )
    if num_of_file_checked%100 == 0:
        print(num_of_file_checked)

channel_list = sorted(all_channels)
pd.Series(channel_list, name="channel").to_csv(
    edf_dir / "nchsdb_all_channels.csv", index=False
)


if rows:
    df = pd.DataFrame(rows)
    df.to_csv(edf_dir / "nchsdb_channels_details.csv", index=False)
print(df['channel'].unique())
print(df['channel'].value_counts())
df.to_csv("chat.csv")

100
200
300
400
500
600
700
800
900
1000
1100


In [11]:
import pathlib, mne, pandas as pd

edf_dir = pathlib.Path("/scratch/besp/shared_data/mros/polysomnography/edfs")
all_channels = set()       
rows = []                 

num_of_file_checked = 0
for edf_path in edf_dir.glob("**/*.edf"):       
    num_of_file_checked +=1
    raw = mne.io.read_raw_edf(edf_path, preload=False, verbose="error")

    hdr          = raw._raw_extras[0]
    rec_len_sec  = hdr["record_length"]
    n_samps_list = hdr["n_samps"]

    for idx, ch in enumerate(raw.info["chs"]):
        ch_name = ch["ch_name"]
        all_channels.add(ch_name)                


        sfreq = n_samps_list[idx] / rec_len_sec
        rows.append(
            dict(
                file       = edf_path.name,
                channel    = ch_name,
                sfreq_hz   = sfreq,
                phys_unit  = ch.get("unit", "—"),
                lowpass    = ch.get("lowpass",  "—"),
                highpass   = ch.get("highpass", "—"),
            )
        )
    if num_of_file_checked%100 == 0:
        print(num_of_file_checked)

channel_list = sorted(all_channels)
pd.Series(channel_list, name="channel").to_csv(
    edf_dir / "nchsdb_all_channels.csv", index=False
)


if rows:
    df = pd.DataFrame(rows)
    df.to_csv(edf_dir / "nchsdb_channels_details.csv", index=False)
print(df['channel'].unique())
print(df['channel'].value_counts())

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
['Position' 'Leg L' 'Leg R' 'C3' 'C4' 'A1' 'A2' 'ROC' 'LOC' 'ECG L'
 'ECG R' 'L Chin' 'R Chin' 'Airflow' 'SUM' 'Thoracic' 'Abdominal' 'STAT'
 'HR' 'SaO2' 'Cannula Flow' 'DHR' 'L Chin-R Chin' 'C3-A2' 'C4-A1'
 'ECG L-ECG R' 'LegL' 'LegR' 'M1' 'M2' 'E2' 'E1' 'ECGL' 'ECGR' 'LChin'
 'RChin' 'Chest' 'ABD' 'SpO2' 'CannulaFlow' 'CH37' 'CH36']


In [12]:
import pathlib, mne, pandas as pd

edf_dir = pathlib.Path("/scratch/besp/shared_data/mesa/polysomnography/edfs")
all_channels = set()       
rows = []                 

num_of_file_checked = 0
for edf_path in edf_dir.glob("**/*.edf"):       
    num_of_file_checked +=1
    raw = mne.io.read_raw_edf(edf_path, preload=False, verbose="error")

    hdr          = raw._raw_extras[0]
    rec_len_sec  = hdr["record_length"]
    n_samps_list = hdr["n_samps"]

    for idx, ch in enumerate(raw.info["chs"]):
        ch_name = ch["ch_name"]
        all_channels.add(ch_name)                


        sfreq = n_samps_list[idx] / rec_len_sec
        rows.append(
            dict(
                file       = edf_path.name,
                channel    = ch_name,
                sfreq_hz   = sfreq,
                phys_unit  = ch.get("unit", "—"),
                lowpass    = ch.get("lowpass",  "—"),
                highpass   = ch.get("highpass", "—"),
            )
        )
    if num_of_file_checked%100 == 0:
        print(num_of_file_checked)

channel_list = sorted(all_channels)
pd.Series(channel_list, name="channel").to_csv(
    edf_dir / "nchsdb_all_channels.csv", index=False
)


if rows:
    df = pd.DataFrame(rows)
    df.to_csv(edf_dir / "nchsdb_channels_details.csv", index=False)
print(df['channel'].unique())

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
['EKG' 'EOG-L' 'EOG-R' 'EMG' 'EEG1' 'EEG2' 'EEG3' 'Pres' 'Flow' 'Snore'
 'Thor' 'Abdo' 'Leg' 'Therm' 'Pos' 'EKG_Off' 'EOG-L_Off' 'EOG-R_Off'
 'EMG_Off' 'EEG1_Off' 'EEG2_Off' 'EEG3_Off' 'Pleth' 'OxStatus' 'SpO2' 'HR'
 'DHR' 'PTT' 'Aux_AC']


In [1]:
import pathlib, mne, pandas as pd

edf_dir = pathlib.Path("/scratch/besp/shared_data/ccshs/polysomnography/edfs")
all_channels = set()       
rows = []                 

num_of_file_checked = 0
for edf_path in edf_dir.glob("**/*.edf"):       
    num_of_file_checked +=1
    raw = mne.io.read_raw_edf(edf_path, preload=False, verbose="error")

    hdr          = raw._raw_extras[0]
    rec_len_sec  = hdr["record_length"]
    n_samps_list = hdr["n_samps"]

    for idx, ch in enumerate(raw.info["chs"]):
        ch_name = ch["ch_name"]
        all_channels.add(ch_name)                


        sfreq = n_samps_list[idx] / rec_len_sec
        rows.append(
            dict(
                file       = edf_path.name,
                channel    = ch_name,
                sfreq_hz   = sfreq,
                phys_unit  = ch.get("unit", "—"),
                lowpass    = ch.get("lowpass",  "—"),
                highpass   = ch.get("highpass", "—"),
            )
        )
    if num_of_file_checked%100 == 0:
        print(num_of_file_checked)

channel_list = sorted(all_channels)
pd.Series(channel_list, name="channel").to_csv(
    edf_dir / "nchsdb_all_channels.csv", index=False
)


if rows:
    df = pd.DataFrame(rows)
    df.to_csv(edf_dir / "nchsdb_channels_details.csv", index=False)
print(df['channel'].unique())

100
200
300
400
500
['C3' 'C4' 'A1' 'A2' 'LOC' 'ROC' 'ECG2' 'ECG1' 'LEFT LEG1' 'LEFT LEG2'
 'RIGHT LEG1' 'RIGHT LEG2' 'EMG1' 'EMG2' 'EMG3' 'AIRFLOW' 'THOR EFFORT'
 'ABDO EFFORT' 'SNORE' 'SUM' 'POSITION' 'OX STATUS' 'PULSE' 'SpO2'
 'NASAL PRES' 'PlethWV' 'Light' 'HRate' 'L Leg' 'R Leg' 'PTT' 'position'
 'DHR' 'HRate-0' 'HRate-1']


In [2]:
import pathlib, mne, pandas as pd

edf_dir = pathlib.Path("/scratch/besp/shared_data/cfs/polysomnography/edfs")
all_channels = set()       
rows = []                 

num_of_file_checked = 0
for edf_path in edf_dir.glob("**/*.edf"):       
    num_of_file_checked +=1
    raw = mne.io.read_raw_edf(edf_path, preload=False, verbose="error")

    hdr          = raw._raw_extras[0]
    rec_len_sec  = hdr["record_length"]
    n_samps_list = hdr["n_samps"]

    for idx, ch in enumerate(raw.info["chs"]):
        ch_name = ch["ch_name"]
        all_channels.add(ch_name)                


        sfreq = n_samps_list[idx] / rec_len_sec
        rows.append(
            dict(
                file       = edf_path.name,
                channel    = ch_name,
                sfreq_hz   = sfreq,
                phys_unit  = ch.get("unit", "—"),
                lowpass    = ch.get("lowpass",  "—"),
                highpass   = ch.get("highpass", "—"),
            )
        )
    if num_of_file_checked%100 == 0:
        print(num_of_file_checked)

channel_list = sorted(all_channels)
pd.Series(channel_list, name="channel").to_csv(
    edf_dir / "nchsdb_all_channels.csv", index=False
)


if rows:
    df = pd.DataFrame(rows)
    df.to_csv(edf_dir / "nchsdb_channels_details.csv", index=False)
print(df['channel'].unique())

100
200
300
400
500
600
700
['C3' 'C4' 'M1' 'M2' 'LOC' 'ROC' 'ECG2' 'ECG1' 'EMG1' 'EMG2' 'EMG3'
 'L Leg' 'R Leg' 'AIRFLOW' 'THOR EFFORT' 'ABDO EFFORT' 'SNORE' 'SUM'
 'POSITION' 'OX STATUS' 'PULSE' 'SpO2' 'NASAL PRES' 'PlethWV' 'Light'
 'HRate' 'SaO2' 'Masimo' 'PAP FLOW']


In [3]:
import pathlib, mne, pandas as pd

edf_dir = pathlib.Path("/scratch/besp/shared_data/sof/polysomnography/edfs")
all_channels = set()       
rows = []                 

num_of_file_checked = 0
for edf_path in edf_dir.glob("**/*.edf"):       
    num_of_file_checked +=1
    raw = mne.io.read_raw_edf(edf_path, preload=False, verbose="error")

    hdr          = raw._raw_extras[0]
    rec_len_sec  = hdr["record_length"]
    n_samps_list = hdr["n_samps"]

    for idx, ch in enumerate(raw.info["chs"]):
        ch_name = ch["ch_name"]
        all_channels.add(ch_name)                


        sfreq = n_samps_list[idx] / rec_len_sec
        rows.append(
            dict(
                file       = edf_path.name,
                channel    = ch_name,
                sfreq_hz   = sfreq,
                phys_unit  = ch.get("unit", "—"),
                lowpass    = ch.get("lowpass",  "—"),
                highpass   = ch.get("highpass", "—"),
            )
        )
    if num_of_file_checked%100 == 0:
        print(num_of_file_checked)

channel_list = sorted(all_channels)
pd.Series(channel_list, name="channel").to_csv(
    edf_dir / "nchsdb_all_channels.csv", index=False
)


if rows:
    df = pd.DataFrame(rows)
    df.to_csv(edf_dir / "nchsdb_channels_details.csv", index=False)
print(df['channel'].unique())

100
200
300
400
['Position' 'Leg/L' 'Leg/R' 'C3' 'C4' 'A1' 'A2' 'ROC' 'LOC' 'ECG1' 'ECG2'
 'L Chin' 'R Chin' 'Airflow' 'Thoracic' 'Abdominal' 'STAT' 'HR' 'SAO2'
 'Cannula Flow' 'EMG/L' 'EMG/R' 'Nasal Pressure' 'CH30' 'NASAL PRESSURE'
 'O1' 'O2' 'DHR']


In [8]:
import pathlib, mne, pandas as pd

edf_dir = pathlib.Path("/scratch/besp/shared_data/wsc/polysomnography")
all_channels = set()       
rows = []                 

num_of_file_checked = 0
for edf_path in edf_dir.glob("**/*.edf"):       
    num_of_file_checked +=1
    raw = mne.io.read_raw_edf(edf_path, preload=False, verbose="error")

    hdr          = raw._raw_extras[0]
    rec_len_sec  = hdr["record_length"]
    n_samps_list = hdr["n_samps"]

    for idx, ch in enumerate(raw.info["chs"]):
        ch_name = ch["ch_name"]
        all_channels.add(ch_name)                


        sfreq = n_samps_list[idx] / rec_len_sec
        rows.append(
            dict(
                file       = edf_path.name,
                channel    = ch_name,
                sfreq_hz   = sfreq,
                phys_unit  = ch.get("unit", "—"),
                lowpass    = ch.get("lowpass",  "—"),
                highpass   = ch.get("highpass", "—"),
            )
        )
    if num_of_file_checked%100 == 0:
        print(num_of_file_checked)

channel_list = sorted(all_channels)
pd.Series(channel_list, name="channel").to_csv(
    edf_dir / "nchsdb_all_channels.csv", index=False
)


if rows:
    df = pd.DataFrame(rows)
    df.to_csv(edf_dir / "nchsdb_channels_details.csv", index=False)
print(df['channel'].unique())


100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
['E1' 'E2' 'C3_M2' 'O1_M2' 'F3_M2' 'cchin_l' 'lleg_r' 'ECG' 'flow'
 'nas_pres' 'thorax' 'abdomen' 'sum' 'spo2' 'chin' 'snore' 'nasalflow'
 'oralflow' 'position' 'F4_AVG' 'Fz_AVG' 'Cz_AVG' 'C3_AVG' 'Pz_AVG'
 'O1_AVG' 'cchin_r' 'Fz_M2' 'Cz_M2' 'Pz_M2' 'O2_M2' 'F3_AVG' 'F4_M1'
 'pap_flow' 'pap_pres' 'F4_M2' 'rchin_l' 'C4_M1' 'C4_M2' 'rleg1_2'
 'lleg1_2' 'F3_M1' 'C3_M1' 'O1_M1' 'O2_M1' 'C4_AVG' 'Fz_M1' 'Cz_M1'
 'Pz_Cz']


In [4]:
import pathlib, mne, pandas as pd

edf_dir = pathlib.Path("/scratch/besp/shared_data/chat/polysomnography/edfs")
all_channels = set()       
rows = []                 

num_of_file_checked = 0
for edf_path in edf_dir.glob("nonrandomized/*.edf"):       
    num_of_file_checked +=1
    raw = mne.io.read_raw_edf(edf_path, preload=False, verbose="error")

    hdr          = raw._raw_extras[0]
    rec_len_sec  = hdr["record_length"]
    n_samps_list = hdr["n_samps"]

    for idx, ch in enumerate(raw.info["chs"]):
        ch_name = ch["ch_name"]
        all_channels.add(ch_name)                


        sfreq = n_samps_list[idx] / rec_len_sec
        rows.append(
            dict(
                file       = edf_path.name,
                channel    = ch_name,
                sfreq_hz   = sfreq,
                phys_unit  = ch.get("unit", "—"),
                lowpass    = ch.get("lowpass",  "—"),
                highpass   = ch.get("highpass", "—"),
            )
        )
    if num_of_file_checked%100 == 0:
        print(num_of_file_checked)

channel_list = sorted(all_channels)
pd.Series(channel_list, name="channel").to_csv(
    edf_dir / "nchsdb_all_channels.csv", index=False
)


if rows:
    df = pd.DataFrame(rows)
    df.to_csv(edf_dir / "nchsdb_channels_details.csv", index=False)
print(df['channel'].unique())

100
200
300
400
500
600
700
['REF X1' 'E1' 'E2' 'F3' 'F4' 'C3' 'C4' 'T3' 'T4' 'O1' 'O2' 'M1' 'M2'
 'Lchin' 'Rchin' 'Cchin' 'CHIN' 'ECG1' 'ECG2' 'ECG3' 'ECG' 'Chest' 'ABD'
 'SUM' 'Airflow' 'CannulaFlow' 'Position' 'Snore' 'Rleg' 'Lleg' 'SAO2'
 'Pleth' 'Pulse' 'TCCO2' 'PaO2' 'EtCO2' 'Cap' 'Gravity X' 'Gravity Y'
 'CPAP Pressure' 'CPAP Flow' 'CPAP Leak' 'CPAP Volume' 'SpO2 BB' 'DHR'
 'Lleg1' 'Lleg2' 'Rleg1' 'Rleg2' 'LChin' 'RChin' 'Sum' 'OxSTAT' 'C-Pres'
 'Light' 'ManPos' 'SAO2Nellcor' 'PulseNellcor' 'PlethNellcor' 'ETCO2' 'RR'
 'SaO2' 'SNORE 2' 'RLEG' 'LLEG' 'BciEtCO2' 'BciCap' 'DHR-0' 'DHR-1'
 'SenTec CO2' 'Event' 'F7' 'F8' 'RAT' 'LAT' 'DC1' 'Leak' 'Pap' 'DC4'
 'CFlow' 'notused' 'Hr' 'X1 DC5' 'X1 DC6' 'X1 DC11' 'X1 DC12' 'FZ'
 'Masimo' 'PlethMasimo' 'X4-Y4' 'X8-Y8' 'Fz' 'Fp1' 'Fp2' 'Fpz' 'P3' 'P4'
 'T5' 'T6' 'DC2' 'DC3' 'CPress' 'EXTSAT' 'Braebon Body P' 'DC2-DCRef'
 'DC5-DCRef' 'DC6-DCRef' 'DC7-DCRef' 'OXER-DCRef' 'PHOT-NoRef' 'Snore 2'
 'LLeg' 'RLeg' 'Compumedics Body' 'CHEST' 'SNORE'

In [5]:
import pathlib, mne, pandas as pd

edf_dir = pathlib.Path("/scratch/besp/shared_data/chat/polysomnography/edfs")
all_channels = set()       
rows = []                 

num_of_file_checked = 0
for edf_path in edf_dir.glob("baseline/*.edf"):       
    num_of_file_checked +=1
    raw = mne.io.read_raw_edf(edf_path, preload=False, verbose="error")

    hdr          = raw._raw_extras[0]
    rec_len_sec  = hdr["record_length"]
    n_samps_list = hdr["n_samps"]

    for idx, ch in enumerate(raw.info["chs"]):
        ch_name = ch["ch_name"]
        all_channels.add(ch_name)                


        sfreq = n_samps_list[idx] / rec_len_sec
        rows.append(
            dict(
                file       = edf_path.name,
                channel    = ch_name,
                sfreq_hz   = sfreq,
                phys_unit  = ch.get("unit", "—"),
                lowpass    = ch.get("lowpass",  "—"),
                highpass   = ch.get("highpass", "—"),
            )
        )
    if num_of_file_checked%100 == 0:
        print(num_of_file_checked)

channel_list = sorted(all_channels)
pd.Series(channel_list, name="channel").to_csv(
    edf_dir / "nchsdb_all_channels.csv", index=False
)


if rows:
    df = pd.DataFrame(rows)
    df.to_csv(edf_dir / "nchsdb_channels_details.csv", index=False)
print(df['channel'].unique())

100
200
300
400
['REF X1' 'E1' 'E2' 'F3' 'F4' 'C3' 'C4' 'T3' 'T4' 'O1' 'O2' 'M1' 'M2'
 'Lchin' 'Rchin' 'Cchin' 'ECG1' 'ECG2' 'ECG3' 'Chest' 'ABD' 'SUM'
 'Airflow' 'Position' 'Snore' 'RLEG' 'LLEG' 'SAO2' 'SpO2 BB' 'Pleth'
 'Pulse' 'CannulaFlow' 'TCCO2' 'PaO2' 'EtCO2' 'Cap' 'Gravity X'
 'Gravity Y' 'CPAP Pressure' 'CPAP Flow' 'CPAP Leak' 'CPAP Volume'
 'SenTec CO2' 'DHR' 'Rleg' 'Lleg' 'ECG' 'CHIN' 'Lleg1' 'Lleg2' 'Rleg1'
 'Rleg2' 'LChin' 'RChin' 'Sum' 'OxSTAT' 'Light' 'ManPos' 'Event' 'F7' 'F8'
 'Fz' 'Fp1' 'Fp2' 'Fpz' 'P3' 'P4' 'T5' 'T6' 'POSdc1' 'BPOSdc2' 'ORAL'
 'DC4' 'CFLOW' 'LEAK' 'PAP' 'SAO2nonin' 'SAO2masimo' 'PulseMasimo' 'Oral'
 'LAT' 'DC1' 'Leak' 'Pap' 'EtC02' 'NotUsed' 'CHEST' 'SNORE' 'ETCO2' 'CAP'
 'FZ' 'CChin' 'Masimo' 'PlethMasimo' 'DC2' 'DC3' 'CPress' 'CFlow' 'EXTSAT'
 'RAT' 'notused' 'X4-Y4' 'Braebon Body P' 'Compumedics Body' 'SNORE 2'
 'C-Pres' 'ProTechPositione' 'BciEtCO2' 'BciCap' 'XFlow' 'XSum' 'PWF'
 'HEARTRATE' 'NPAF' 'Pulsemasimo' 'DHR-0' 'DHR-1' 'REF' 'Gravity-0'


In [6]:
import pathlib, mne, pandas as pd

edf_dir = pathlib.Path("/scratch/besp/shared_data/chat/polysomnography/edfs")
all_channels = set()       
rows = []                 

num_of_file_checked = 0
for edf_path in edf_dir.glob("followup/*.edf"):       
    num_of_file_checked +=1
    raw = mne.io.read_raw_edf(edf_path, preload=False, verbose="error")

    hdr          = raw._raw_extras[0]
    rec_len_sec  = hdr["record_length"]
    n_samps_list = hdr["n_samps"]

    for idx, ch in enumerate(raw.info["chs"]):
        ch_name = ch["ch_name"]
        all_channels.add(ch_name)                


        sfreq = n_samps_list[idx] / rec_len_sec
        rows.append(
            dict(
                file       = edf_path.name,
                channel    = ch_name,
                sfreq_hz   = sfreq,
                phys_unit  = ch.get("unit", "—"),
                lowpass    = ch.get("lowpass",  "—"),
                highpass   = ch.get("highpass", "—"),
            )
        )
    if num_of_file_checked%100 == 0:
        print(num_of_file_checked)

channel_list = sorted(all_channels)
pd.Series(channel_list, name="channel").to_csv(
    edf_dir / "nchsdb_all_channels.csv", index=False
)


if rows:
    df = pd.DataFrame(rows)
    df.to_csv(edf_dir / "nchsdb_channels_details.csv", index=False)
print(df['channel'].unique())

100
200
300
400
['E1' 'E2' 'ECG1' 'ECG2' 'F7' 'F3' 'FZ' 'F4' 'ECG3' 'M1' 'T3' 'C3' 'Rchin'
 'C4' 'T4' 'M2' 'Rleg1' 'Rleg2' 'Lchin' 'Lleg1' 'Lleg2' 'O1' 'Cchin' 'O2'
 'Snore' 'Airflow' 'CannulaFlow' 'Chest' 'ABD' 'SUM' 'SAO2' 'Pulse'
 'Masimo' 'EtCO2' 'PlethMasimo' 'Cap' 'DHR' 'REF X1' 'CHEST' 'Position'
 'SNORE' 'RLEG' 'LLEG' 'Pleth' 'ETCO2' 'CAP' 'ECG' 'CHIN' 'LChin' 'RChin'
 'Sum' 'OxSTAT' 'C-Pres' 'Event' 'F8' 'Fz' 'Fp1' 'Fp2' 'Fpz' 'P3' 'P4'
 'T5' 'T6' 'POSdc1' 'BPOSdc2' 'DC3' 'DC4' 'CFLOW' 'PAP' 'LEAK' 'SAO2nonin'
 'SAO2masimo' 'CChin' 'LLeg1' 'LLeg2' 'RLeg1' 'RLeg2' 'ORAL' 'Pulsemasimo'
 'Light' 'ManPos' 'Lleg' 'Rleg' 'TCCO2' 'PaO2' 'Gravity X' 'Gravity Y'
 'ProTechPositione' 'BciEtCO2' 'BciCap' 'Braebon Body P' 'SA02masimo'
 'pulsemasimo' 'DC2' 'CPress' 'CFlow' 'cap' 'EXTSAT' 'X4-Y4' 'PlethMasino'
 'SpO2 BB' 'CPAP Pressure' 'CPAP Flow' 'CPAP Leak' 'CPAP Volume'
 'SenTec CO2' 'X8-Y8' 'Cz' 'Pz' 'POS' 'RR' 'Plethmasimo' 'DC10' 'REF'
 'Gravity-0' 'Gravity-1' 'Braebon' 'PulseMasimo' 