In [None]:
import mne
import numpy as np
import pandas as pd
import yasa
import matplotlib.pyplot as plt
import os
from pathlib import Path

In [None]:
data_path = Path("./data/recordings/")
sn001_file = list(data_path.glob("**/SN001.edf"))
raw = mne.io.read_raw_edf(sn001_file[0], preload=True)

if 'EEG C4-M1' in raw.ch_names:
    c4_data = raw.get_data(picks=['EEG C4-M1']).squeeze()
    sf = raw.info['sfreq']
        
    plt.figure(figsize=(14, 6))
    yasa.plot_spectrogram(c4_data, sf=sf, hypno=None, 
                             fmin=0.5, fmax=30, cmap='Spectral_r')
        
    plt.tight_layout()
    plt.show()

In [None]:
file_path = "data/recordings/SN001_sleepscoring.txt"
with open(file_path, 'r') as f:
    lines = f.readlines()
    data_lines = lines[1:]
    sleep_stages = []
    for line in data_lines:
        parts = line.strip().split(',')
        if len(parts) >= 5 and 'Sleep stage' in parts[4]:
            stage_text = parts[4].strip()
            if 'W' in stage_text:
                stage_num = 0  # Wake
            elif 'N1' in stage_text or 'S1' in stage_text:
                stage_num = 1  # N1
            elif 'N2' in stage_text or 'S2' in stage_text:
                stage_num = 2  # N2
            elif 'N3' in stage_text or 'S3' in stage_text or 'N4' in stage_text or 'S4' in stage_text:
                stage_num = 3  # N3
            elif 'R' in stage_text or 'R' in stage_text:
                stage_num = 4  # REM
            else:
                continue  # Skip if not a recognized sleep stage
            sleep_stages.append(stage_num)
sleep_df = pd.DataFrame({'sleep_stage': sleep_stages})

In [None]:
sls = yasa.SleepStaging(raw, eeg_name="EEG C4-M1")
hypno_pred = sls.predict()
hypno_pred = yasa.hypno_str_to_int(hypno_pred) 
hypno_up = yasa.hypno_upsample_to_data(hypno_pred, sf_hypno=1/30, data=raw)
yasa.plot_spectrogram(c4_data, sf, hypno_up)
plt.show()

In [None]:
hypno_up = yasa.hypno_upsample_to_data(sleep_df["sleep_stage"], sf_hypno=1/30, data=raw)
yasa.plot_spectrogram(c4_data, sf, hypno_up)
plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, cohen_kappa_score
import yasa

hypno_true = sleep_df["sleep_stage"][0:854]
hypno_pred = hypno_pred[0:854]

# Calculate overall accuracy
accuracy = accuracy_score(hypno_true, hypno_pred)
print(f"Overall accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")

# Calculate Cohen's Kappa (accounts for chance agreement)
kappa = cohen_kappa_score(hypno_true, hypno_pred)
print(f"Cohen's Kappa: {kappa:.4f}")

# Create a confusion matrix
cm = confusion_matrix(hypno_true, hypno_pred)

# Get unique sleep stages for labeling
unique_stages = np.unique(np.concatenate([hypno_true, hypno_pred]))
stage_labels = ['Wake', 'N1', 'N2', 'N3', 'REM']
labels = [stage_labels[int(stage)] if int(stage) < len(stage_labels) else f'Unknown-{int(stage)}' 
          for stage in unique_stages]

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=labels, yticklabels=labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.tight_layout()
plt.show()

# Calculate per-stage metrics
print("\nClassification Report:")
print(classification_report(hypno_true, hypno_pred, 
                           target_names=labels, zero_division=0))

# Calculate per-stage accuracy
stage_accuracy = {}
for stage in unique_stages:
    mask = (hypno_true == stage)
    if np.sum(mask) > 0:  # Avoid division by zero
        stage_acc = np.mean(hypno_pred[mask] == stage)
        stage_accuracy[stage_labels[int(stage)] if int(stage) < len(stage_labels) else f'Unknown-{int(stage)}'] = stage_acc

# Plot per-stage accuracy
plt.figure(figsize=(10, 6))
sns.barplot(x=list(stage_accuracy.keys()), y=list(stage_accuracy.values()))
plt.axhline(y=accuracy, color='r', linestyle='--', label=f'Overall Accuracy: {accuracy:.4f}')
plt.ylabel('Accuracy')
plt.xlabel('Sleep Stage')
plt.title('Per-Stage Accuracy')
plt.ylim(0, 1)
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()

# Plot both hypnograms for visual comparison
plt.figure(figsize=(15, 8))

# Plot ground truth hypnogram
plt.subplot(2, 1, 1)
yasa.plot_hypnogram(hypno_true, ax=plt.gca())
plt.title('Ground Truth Hypnogram')

# Plot predicted hypnogram
plt.subplot(2, 1, 2)
yasa.plot_hypnogram(hypno_pred, ax=plt.gca())
plt.title('Predicted Hypnogram')

plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, cohen_kappa_score
import yasa
import mne
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')  # Suppress warnings for cleaner output

# Define the range of subjects to process
subject_ids = [f"SN{i:03d}" for i in range(1, 155)]  # SN001 to SN154

# Initialize lists to store results
all_accuracies = []
all_kappas = []
all_hypno_true = []
all_hypno_pred = []
all_cms = []
processed_subjects = []

# Data path
data_path = Path("./data/recordings/")

# Process each subject
for subject_id in subject_ids:
    try:
        print(f"Processing {subject_id}...")
        
        # Find EDF file for this subject
        edf_files = list(data_path.glob(f"**/{subject_id}.edf"))
        
        if not edf_files:
            print(f"No EDF file found for {subject_id}, skipping...")
            continue
            
        # Load EEG data
        raw = mne.io.read_raw_edf(edf_files[0], preload=True)
        
        # Check if required EEG channel exists
        if 'EEG C4-M1' not in raw.ch_names:
            print(f"Required channel 'EEG C4-M1' not found for {subject_id}, skipping...")
            continue
            
        # Load manual sleep scoring
        file_path = f"data/recordings/{subject_id}_sleepscoring.txt"
        try:
            with open(file_path, 'r') as f:
                lines = f.readlines()
                data_lines = lines[1:]  # Skip header
                sleep_stages = []
                for line in data_lines:
                    parts = line.strip().split(',')
                    if len(parts) >= 5 and 'Sleep stage' in parts[4]:
                        stage_text = parts[4].strip()
                        if 'W' in stage_text:
                            stage_num = 0  # Wake
                        elif 'N1' in stage_text or 'S1' in stage_text:
                            stage_num = 1  # N1
                        elif 'N2' in stage_text or 'S2' in stage_text:
                            stage_num = 2  # N2
                        elif 'N3' in stage_text or 'S3' in stage_text or 'N4' in stage_text or 'S4' in stage_text:
                            stage_num = 3  # N3
                        elif 'R' in stage_text:
                            stage_num = 4  # REM
                        else:
                            continue  # Skip if not a recognized sleep stage
                        sleep_stages.append(stage_num)
                        
            if not sleep_stages:
                print(f"No sleep stages found for {subject_id}, skipping...")
                continue
                
            sleep_df = pd.DataFrame({'sleep_stage': sleep_stages})
            
            # Run YASA sleep staging
            sls = yasa.SleepStaging(raw, eeg_name="EEG C4-M1")
            hypno_pred = sls.predict()
            hypno_pred = yasa.hypno_str_to_int(hypno_pred)
            
            # Match lengths of true and predicted hypnograms
            min_length = min(len(sleep_df), len(hypno_pred))
            hypno_true = sleep_df["sleep_stage"].iloc[:min_length].values
            hypno_pred = hypno_pred[:min_length]
            
            # Calculate metrics
            accuracy = accuracy_score(hypno_true, hypno_pred)
            kappa = cohen_kappa_score(hypno_true, hypno_pred)
            cm = confusion_matrix(hypno_true, hypno_pred)
            
            # Store results
            all_accuracies.append(accuracy)
            all_kappas.append(kappa)
            all_hypno_true.extend(hypno_true)
            all_hypno_pred.extend(hypno_pred)
            all_cms.append(cm)
            processed_subjects.append(subject_id)
            
            print(f"  Accuracy: {accuracy:.4f}, Kappa: {kappa:.4f}")
            
        except FileNotFoundError:
            print(f"Sleep scoring file not found for {subject_id}, skipping...")
            continue
            
    except Exception as e:
        print(f"Error processing {subject_id}: {str(e)}")
        continue

# Calculate aggregated metrics
if processed_subjects:
    print("\n" + "="*50)
    print(f"Successfully processed {len(processed_subjects)} subjects")
    print("="*50)
    
    # Overall accuracy and kappa
    overall_accuracy = accuracy_score(all_hypno_true, all_hypno_pred)
    overall_kappa = cohen_kappa_score(all_hypno_true, all_hypno_pred)
    
    print(f"\nAggregated Results:")
    print(f"Overall accuracy: {overall_accuracy:.4f} ({overall_accuracy*100:.2f}%)")
    print(f"Overall Cohen's Kappa: {overall_kappa:.4f}")
    
    # Mean accuracy and kappa across subjects
    mean_accuracy = np.mean(all_accuracies)
    mean_kappa = np.mean(all_kappas)
    
    print(f"\nMean accuracy across subjects: {mean_accuracy:.4f} ({mean_accuracy*100:.2f}%)")
    print(f"Mean Cohen's Kappa across subjects: {mean_kappa:.4f}")
    
    # Calculate overall confusion matrix
    overall_cm = confusion_matrix(all_hypno_true, all_hypno_pred)
    
    # Get unique sleep stages for labeling
    unique_stages = np.unique(np.concatenate([all_hypno_true, all_hypno_pred]))
    stage_labels = ['Wake', 'N1', 'N2', 'N3', 'REM']
    labels = [stage_labels[int(stage)] if int(stage) < len(stage_labels) else f'Unknown-{int(stage)}' 
              for stage in unique_stages]
    
    # Plot overall confusion matrix
    plt.figure(figsize=(10, 8))
    sns.heatmap(overall_cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=labels, yticklabels=labels)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Overall Confusion Matrix (All Subjects)')
    plt.tight_layout()
    plt.savefig('overall_confusion_matrix.png')
    plt.show()
    
    # Calculate per-stage metrics
    print("\nOverall Classification Report:")
    print(classification_report(all_hypno_true, all_hypno_pred, 
                               target_names=labels, zero_division=0))
    
    # Calculate per-stage accuracy
    stage_accuracy = {}
    for stage in unique_stages:
        mask = (np.array(all_hypno_true) == stage)
        if np.sum(mask) > 0:  # Avoid division by zero
            stage_acc = np.mean(np.array(all_hypno_pred)[mask] == stage)
            stage_accuracy[stage_labels[int(stage)] if int(stage) < len(stage_labels) else f'Unknown-{int(stage)}'] = stage_acc
    
    # Plot per-stage accuracy
    plt.figure(figsize=(10, 6))
    sns.barplot(x=list(stage_accuracy.keys()), y=list(stage_accuracy.values()))
    plt.axhline(y=overall_accuracy, color='r', linestyle='--', label=f'Overall Accuracy: {overall_accuracy:.4f}')
    plt.ylabel('Accuracy')
    plt.xlabel('Sleep Stage')
    plt.title('Per-Stage Accuracy (All Subjects)')
    plt.ylim(0, 1)
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.savefig('per_stage_accuracy.png')
    plt.show()
    
    # Distribution of accuracies across subjects
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    sns.histplot(all_accuracies, kde=True)
    plt.axvline(x=mean_accuracy, color='r', linestyle='--', label=f'Mean: {mean_accuracy:.4f}')
    plt.title('Distribution of Accuracies Across Subjects')
    plt.xlabel('Accuracy')
    plt.ylabel('Count')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    sns.histplot(all_kappas, kde=True)
    plt.axvline(x=mean_kappa, color='r', linestyle='--', label=f'Mean: {mean_kappa:.4f}')
    plt.title('Distribution of Kappa Values Across Subjects')
    plt.xlabel('Cohen\'s Kappa')
    plt.ylabel('Count')
    plt.legend()
    
    plt.tight_layout()
    plt.savefig('accuracy_distribution.png')
    plt.show()
    
    # Save results to CSV
    results_df = pd.DataFrame({
        'Subject': processed_subjects,
        'Accuracy': all_accuracies,
        'Kappa': all_kappas
    })
    results_df.to_csv('sleep_staging_results.csv', index=False)
    print("\nResults saved to 'sleep_staging_results.csv'")
    
else:
    print("No subjects were successfully processed.")