In [2]:
import mne
import os
import urllib.request
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from mne.time_frequency import psd_array_welch

In [None]:
base_url = "https://physionet.org/files/sleep-edfx/1.0.0/sleep-cassette/"
subject_ids = ['SC4001E0', 'SC4002E0', 'SC4031E0', 'SC4012E0',]  # Add more if needed

# user_ids = [f"U_{i+1}" for i in range(len(subject_ids))]
# subject_to_user = dict(zip(subject_ids, user_ids))

eeg_to_social_map = {
    'SC4001E0': 'U_1',
    'SC4002E0': 'U_2',
    'SC4031E0': 'U_3',
    'SC4012E0': 'U_4'
}

all_eeg_features=[]


for subject_id in subject_ids:
    print(f"\n🔄 Checking files for subject: {subject_id}")

    psg_file = f"{subject_id}-PSG.edf"
    hypnogram_file = f"{subject_id[:-1]}C-Hypnogram.edf"  # Fix for hypnogram filename

    try:
        if not os.path.exists(psg_file):
            print(f"⬇️  Downloading {psg_file}...")
            urllib.request.urlretrieve(base_url + psg_file, psg_file)
        else:
            print(f"✅ {psg_file} already exists.")

        if not os.path.exists(hypnogram_file):
            print(f"⬇️  Downloading {hypnogram_file}...")
            urllib.request.urlretrieve(base_url + hypnogram_file, hypnogram_file)
        else:
            print(f"✅ {hypnogram_file} already exists.")

    except Exception as e:
        print(f"❌ Error downloading {subject_id}: {e}")


🔄 Checking files for subject: SC4001E0
✅ SC4001E0-PSG.edf already exists.
✅ SC4001EC-Hypnogram.edf already exists.

🔄 Checking files for subject: SC4002E0
✅ SC4002E0-PSG.edf already exists.
✅ SC4002EC-Hypnogram.edf already exists.

🔄 Checking files for subject: SC4031E0
✅ SC4031E0-PSG.edf already exists.
✅ SC4031EC-Hypnogram.edf already exists.

🔄 Checking files for subject: SC4012E0
✅ SC4012E0-PSG.edf already exists.
✅ SC4012EC-Hypnogram.edf already exists.


In [28]:
current_subject = 'SC4012E0'  # Change this as needed

psg_file = f"{current_subject}-PSG.edf"
hypnogram_file = f"{current_subject[:-1]}C-Hypnogram.edf"

psg = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)
annotations = mne.read_annotations(hypnogram_file)
psg.set_annotations(annotations)

eeg = psg.copy().pick_types(eeg=True)
eeg.filter(0.3, 35., fir_design='firwin')


  psg = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)
  psg = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)
  psg = mne.io.read_raw_edf(psg_file, preload=True, verbose=False)


NOTE: pick_types() is a legacy function. New code should use inst.pick(...).


  psg.set_annotations(annotations)


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.3 - 35 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.30
- Lower transition bandwidth: 0.30 Hz (-6 dB cutoff frequency: 0.15 Hz)
- Upper passband edge: 35.00 Hz
- Upper transition bandwidth: 8.75 Hz (-6 dB cutoff frequency: 39.38 Hz)
- Filter length: 1101 samples (11.010 s)



Unnamed: 0,General,General.1
,Filename(s),SC4012E0-PSG.edf
,MNE object type,RawEDF
,Measurement date,1989-03-30 at 17:00:00 UTC
,Participant,X
,Experimenter,Unknown
,Acquisition,Acquisition
,Duration,23:45:00 (HH:MM:SS)
,Sampling frequency,100.00 Hz
,Time points,8550000
,Channels,Channels


In [29]:
events, _ = mne.events_from_annotations(eeg)

event_id = {
    'Sleep stage W': 1,
    'Sleep stage 1': 2,
    'Sleep stage 2': 3,
    'Sleep stage 3': 4,
    'Sleep stage 4': 4,
    'Sleep stage R': 5
}

epochs = mne.Epochs(
    eeg,
    events=events,
    event_id=event_id,
    tmin=0,
    tmax=30,
    baseline=None,
    preload=True,
    verbose=False
)


Used Annotations descriptions: [np.str_('Sleep stage 1'), np.str_('Sleep stage 2'), np.str_('Sleep stage 3'), np.str_('Sleep stage 4'), np.str_('Sleep stage ?'), np.str_('Sleep stage R'), np.str_('Sleep stage W')]


In [30]:
data_uv = epochs.get_data() * 1e6  # Convert to microvolts

psd_band, freqs = psd_array_welch(
    data_uv,
    sfreq=epochs.info['sfreq'],
    fmin=0.3,
    fmax=35,
    n_fft=2048
)

freq_bands = {
    'Delta': (0.3, 4),
    'Theta': (4, 8),
    'Alpha': (8, 13),
    'Beta': (13, 30)
}

band_powers = {}
for band, (fmin, fmax) in freq_bands.items():
    mask = (freqs >= fmin) & (freqs <= fmax)
    band_psd = psd_band[:, :, mask]
    band_avg_power = band_psd.mean(axis=(0, 1))
    band_powers[band] = band_avg_power


Effective window size : 20.480 (s)


In [31]:
# Normalize lengths to avoid shape mismatches

min_len = min(len(p) for p in band_powers.values())
for band in band_powers:
    band_powers[band] = band_powers[band][:min_len]

df = pd.DataFrame(band_powers)
df['User_ID'] = eeg_to_social_map[current_subject]
# Append to all features
all_eeg_features.append(df)

df.head()  # Preview

print(all_eeg_features)


[           Delta         Theta         Alpha          Beta User_ID
0   6.225289e+15  4.518128e+10  1.136808e+10  4.433430e+09     U_1
1   1.918084e+15  4.409157e+10  1.123319e+10  4.402236e+09     U_1
2   2.661220e+15  4.304117e+10  1.110073e+10  4.371390e+09     U_1
3   3.273159e+15  4.202819e+10  1.097066e+10  4.340888e+09     U_1
4   4.294976e+14  4.105088e+10  1.084292e+10  4.310724e+09     U_1
..           ...           ...           ...           ...     ...
70  5.129385e+10  1.320127e+10  5.698994e+09  2.876911e+09     U_1
71  4.997500e+10  1.303177e+10  5.652618e+09  2.861443e+09     U_1
72  4.870681e+10  1.286558e+10  5.606832e+09  2.846113e+09     U_1
73  4.748671e+10  1.270262e+10  5.561627e+09  2.830919e+09     U_1
74  4.631229e+10  1.254279e+10  5.516993e+09  2.815859e+09     U_1

[75 rows x 5 columns],            Delta         Theta         Alpha          Beta User_ID
0   6.724013e+16  3.218590e+11  8.097490e+10  3.157867e+10     U_2
1   1.790626e+16  3.140952e+11  8.001

In [32]:

# Concatenate all EEG DataFrames into one
combined_eeg_df = pd.concat(all_eeg_features, ignore_index=True)

# Save to CSV
combined_eeg_df.to_csv("eeg_features_combined.csv", index=False)

print("✅ EEG features saved to 'eeg_features_combined.csv'")


✅ EEG features saved to 'eeg_features_combined.csv'
