In [11]:
import os
import pyedflib
import scipy.io
import numpy as np
import h5py

In [12]:
base_path = "/Users/tereza/spring_2025/STAT_4830/STAT-4830-GOALZ-project/data/ANPHY-Sleep"
subject_folder = os.path.join(base_path, "EPCTL01")
edf_file = os.path.join(subject_folder, "EPCTL01.edf")
artifact_path = os.path.join(base_path, "Artifact matrix")

In [13]:
# Load EEG metadata from EDF file
def load_edf_metadata(edf_path):
    with pyedflib.EdfReader(edf_path) as f:
        n_channels = f.signals_in_file
        signal_labels = f.getSignalLabels()
        sample_rates = [f.getSampleFrequency(i) for i in range(n_channels)]
        duration = f.file_duration
        print(f"Metadata for {edf_path}:")
        print(f"- Number of Channels: {n_channels}")
        print(f"- Signal Labels: {signal_labels}")
        print(f"- Sampling Rates: {sample_rates}")
        print(f"- Recording Duration: {duration} sec")

# Load Artifact Matrices using h5py for v7.3 MATLAB files
def load_artifact_matrices(artifact_folder):
    artifact_files = [f for f in os.listdir(artifact_folder) if f.endswith(".mat")]
    artifact_data = {}

    for file in artifact_files:
        file_path = os.path.join(artifact_folder, file)
        
        try:
            with h5py.File(file_path, 'r') as mat_data:
                print(f"\nLoaded artifact matrix: {file}")
                print("Keys in this file:", list(mat_data.keys()))
                
                # Load all datasets into dictionary
                artifact_data[file] = {key: np.array(mat_data[key]) for key in mat_data.keys()}

        except Exception as e:
            print(f"Error loading {file}: {e}")
    
    return artifact_data

In [16]:
# subjects EPCTL01 to EPCTL29
for i in range(1, 30):
    subj = f"EPCTL{str(i).zfill(2)}"
    subject_folder = os.path.join(base_path, subj)
    edf_file = os.path.join(subject_folder, f"{subj}.edf")
    print(f"\nProcessing subject: {subj}")
    load_edf_metadata(edf_file)


Processing subject: EPCTL01
Metadata for /Users/tereza/spring_2025/STAT_4830/STAT-4830-GOALZ-project/data/ANPHY-Sleep/EPCTL01/EPCTL01.edf:
- Number of Channels: 93
- Signal Labels: ['Fp1-Ref', 'Fp2-Ref', 'F3-Ref', 'F4-Ref', 'C3-Ref', 'C4-Ref', 'P3-Ref', 'P4-Ref', 'O1-Ref', 'O2-Ref', 'F7-Ref', 'F8-Ref', 'T3-Ref', 'T4-Ref', 'T5-Ref', 'T6-Ref', 'FZ-Ref', 'CZ-Ref', 'PZ-Ref', 'SO1-Ref', 'SO2-Ref', 'F9-Ref', 'F10-Ref', 'ZY1', 'ZY2', 'T9-Ref', 'T10-Ref', 'P9-Ref', 'P10-Ref', 'AF7-Ref', 'AF3-Ref', 'F11', 'F5-Ref', 'F1-Ref', 'FT11', 'FT9-Ref', 'FT7-Ref', 'FC5-Ref', 'FC3-Ref', 'FC1-Ref', 'FCZ-Ref', 'C5-Ref', 'C1-Ref', 'TP11', 'TP9-Ref', 'TP7-Ref', 'CP3-Ref', 'CP1-Ref', 'P11', 'P5-Ref', 'P1-Ref', 'PO7-Ref', 'PO3-Ref', 'POZ-Ref', 'OZ-Ref', 'FPZ-Ref', 'AFZ-Ref', 'AF4-Ref', 'AF8-Ref', 'F2-Ref', 'F6-Ref', 'F12', 'FC2-Ref', 'FC4-Ref', 'FC6-Ref', 'FT8-Ref', 'FT10-Ref', 'FT12', 'C6-Ref', 'C2-Ref', 'CPZ-Ref', 'CP2-Ref', 'CP4-Ref', 'CP6-Ref', 'TP8-Ref', 'TP10-Ref', 'TP12', 'P2-Ref', 'P6-Ref', 'P12', 'PO4

In [None]:
artifact_data = load_artifact_matrices(artifact_path)

Metadata for /Users/tereza/spring_2025/STAT_4830/STAT-4830-GOALZ-project/data/ANPHY-Sleep/EPCTL01/EPCTL01.edf:
- Number of Channels: 93
- Signal Labels: ['Fp1-Ref', 'Fp2-Ref', 'F3-Ref', 'F4-Ref', 'C3-Ref', 'C4-Ref', 'P3-Ref', 'P4-Ref', 'O1-Ref', 'O2-Ref', 'F7-Ref', 'F8-Ref', 'T3-Ref', 'T4-Ref', 'T5-Ref', 'T6-Ref', 'FZ-Ref', 'CZ-Ref', 'PZ-Ref', 'SO1-Ref', 'SO2-Ref', 'F9-Ref', 'F10-Ref', 'ZY1', 'ZY2', 'T9-Ref', 'T10-Ref', 'P9-Ref', 'P10-Ref', 'AF7-Ref', 'AF3-Ref', 'F11', 'F5-Ref', 'F1-Ref', 'FT11', 'FT9-Ref', 'FT7-Ref', 'FC5-Ref', 'FC3-Ref', 'FC1-Ref', 'FCZ-Ref', 'C5-Ref', 'C1-Ref', 'TP11', 'TP9-Ref', 'TP7-Ref', 'CP3-Ref', 'CP1-Ref', 'P11', 'P5-Ref', 'P1-Ref', 'PO7-Ref', 'PO3-Ref', 'POZ-Ref', 'OZ-Ref', 'FPZ-Ref', 'AFZ-Ref', 'AF4-Ref', 'AF8-Ref', 'F2-Ref', 'F6-Ref', 'F12', 'FC2-Ref', 'FC4-Ref', 'FC6-Ref', 'FT8-Ref', 'FT10-Ref', 'FT12', 'C6-Ref', 'C2-Ref', 'CPZ-Ref', 'CP2-Ref', 'CP4-Ref', 'CP6-Ref', 'TP8-Ref', 'TP10-Ref', 'TP12', 'P2-Ref', 'P6-Ref', 'P12', 'PO4-Ref', 'PO8-Ref', 'ChEMG1', '

In [None]:
artifact_path = os.path.join(base_path, "Artifact matrix")

In [None]:
import os
import pyedflib
import numpy as np
import pandas as pd
import h5py

base_path = "/Users/tereza/spring_2025/STAT_4830/STAT-4830-GOALZ-project/data/ANPHY-Sleep"

# Load subject details CSV file
details_csv = os.path.join(base_path, "Details information for healthy subjects.csv")
details = pd.read_csv(details_csv)
print("Loaded details:\n", details.head())

def load_edf_metadata(edf_path):
    with pyedflib.EdfReader(edf_path) as f:
        n_channels = f.signals_in_file
        signal_labels = f.getSignalLabels()
        sample_rates = [f.getSampleFrequency(i) for i in range(n_channels)]
        duration = f.file_duration
        print(f"Metadata for {edf_path}:")
        print(f"  Channels: {n_channels}")
        print(f"  Labels: {signal_labels}")
        print(f"  Sampling Rates (Hz): {sample_rates}")
        print(f"  Duration (sec): {duration}")

def load_artifact_matrices(artifact_folder):
    artifact_files = [f for f in os.listdir(artifact_folder) if f.endswith(".mat")]
    artifact_data = {}
    for file in artifact_files:
        file_path = os.path.join(artifact_folder, file)
        try:
            with h5py.File(file_path, 'r') as mat_data:
                print(f"\nLoaded artifact matrix: {file}")
                print("  Keys:", list(mat_data.keys()))
                artifact_data[file] = {key: np.array(mat_data[key]) for key in mat_data.keys()}
        except Exception as e:
            print(f"Error loading {file}: {e}")
    return artifact_data

def load_txt_file(txt_path):
    # Assumes a whitespace-delimited text file.
    data = np.loadtxt(txt_path, dtype=str)
    return data

In [None]:
# Loop over subject folders (EPCTL01 to EPCTL29)
for i in range(1, 30):
    subj = f"EPCTL{str(i).zfill(2)}"
    subject_folder = os.path.join(base_path, subj)
    
    # Load EDF metadata
    edf_file = os.path.join(subject_folder, f"{subj}.edf")
    print(f"\nProcessing subject: {subj}")
    load_edf_metadata(edf_file)
    
    # Load text file annotations (sleep staging)
    txt_files = [f for f in os.listdir(subject_folder) if f.endswith(".txt")]
    if txt_files:
        txt_path = os.path.join(subject_folder, txt_files[0])
        txt_data = load_txt_file(txt_path)
        print(f"Text file data for subject {subj}:\n", txt_data)
    else:
        print(f"No text file found for subject {subj}")

In [None]:
artifact_data = load_artifact_matrices(artifact_path)