In [1]:
import os
import pandas as pd
import mne
import numpy as np
from config.config import DataConfig

In [2]:
def convert_into_csv(text_file):
    """Convert text file into CSV with sleep stage labels"""
    data = pd.read_csv(text_file, sep='\t', header=None, names=['Stage', 'Time', 'SamplingRate'])

    output_file = text_file.replace('.txt', '_labeled.csv')
    
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
        
    data.to_csv(output_file, sep='\t', index=False)
    print(f"Created CSV: {output_file}")

In [10]:
for subject_id in DataConfig.SUBJECTS:
    paths = DataConfig.get_subject_paths(subject_id)
    print(f"\nProcessing subject: {subject_id}")
    try:
        convert_into_csv(paths['txt'])
    except Exception as e:
        print(f"Error processing {subject_id}: {str(e)}")


Processing subject: EPCTL01
Created CSV: /Users/kimberly/Documents/STAT4830/STAT-4830-GOALZ-project/Anphy Dataset/EPCTL01/EPCTL01_labeled.csv

Processing subject: EPCTL02
Created CSV: /Users/kimberly/Documents/STAT4830/STAT-4830-GOALZ-project/Anphy Dataset/EPCTL02/EPCTL02_labeled.csv

Processing subject: EPCTL03
Created CSV: /Users/kimberly/Documents/STAT4830/STAT-4830-GOALZ-project/Anphy Dataset/EPCTL03/EPCTL03_labeled.csv

Processing subject: EPCTL04
Created CSV: /Users/kimberly/Documents/STAT4830/STAT-4830-GOALZ-project/Anphy Dataset/EPCTL04/EPCTL04_labeled.csv

Processing subject: EPCTL05
Created CSV: /Users/kimberly/Documents/STAT4830/STAT-4830-GOALZ-project/Anphy Dataset/EPCTL05/EPCTL05_labeled.csv

Processing subject: EPCTL06
Created CSV: /Users/kimberly/Documents/STAT4830/STAT-4830-GOALZ-project/Anphy Dataset/EPCTL06/EPCTL06_labeled.csv

Processing subject: EPCTL07
Created CSV: /Users/kimberly/Documents/STAT4830/STAT-4830-GOALZ-project/Anphy Dataset/EPCTL07/EPCTL07_labeled.csv

In [3]:

def annotate_edf_with_stages(edf_file, label_csv):
    """
    Annotate EDF file with sleep stages from CSV labels
    
    Args:
        edf_file (str): Path to the EDF file
        label_csv (str): Path to the CSV file containing sleep stage labels
        
    Returns:
        mne.io.Raw: Annotated EDF data
    """
    try:
        # Read the EDF file
        raw = mne.io.read_raw_edf(edf_file, preload=True)
        # Read the labels
        labels = pd.read_csv(label_csv, sep='\t')
        
        # Create annotations
        onset = labels['Time'].values  # Time in seconds
        duration = np.repeat(30, len(labels))  # 30-second epochs
        description = labels['Stage'].values
        
        # Create MNE annotations
        annotations = mne.Annotations(onset=onset,
                                   duration=duration,
                                   description=description)
        
        # Add annotations to the raw data
        raw.set_annotations(annotations)
        
        # Save annotated EDF (optional)
        output_file = edf_file.replace('.edf', '_annotated.edf')
        raw.save(output_file, overwrite=True)
        
        print(f"Successfully annotated: {os.path.basename(edf_file)}")
        return raw
        
    except Exception as e:
        print(f"Error processing {os.path.basename(edf_file)}: {str(e)}")
        return None

def process_subject(subject_id):
    """Process a single subject using paths from config"""
    paths = DataConfig.get_subject_paths(subject_id)
    label_csv = paths['txt'].replace('.txt', '_labeled.csv')
    
    if os.path.exists(paths['edf']) and os.path.exists(label_csv):
        raw = annotate_edf_with_stages(paths['edf'], label_csv)
        return raw
    else:
        print(f"Files not found for subject {subject_id}")
        print(f"EDF exists: {os.path.exists(paths['edf'])}")
        print(f"CSV exists: {os.path.exists(label_csv)}")
        return None

In [None]:
# Process all subjects
for subject_id in DataConfig.SUBJECTS:
    print(f"\nProcessing subject: {subject_id}")
    raw = process_subject(subject_id)


Processing subject: EPCTL01
Extracting EDF parameters from /Users/kimberly/Documents/STAT4830/STAT-4830-GOALZ-project/Anphy Dataset/EPCTL01/EPCTL01.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 28737999  =      0.000 ... 28737.999 secs...
