In [1]:
import pandas as pd
import numpy as np
import glob

## Baseline

In [36]:
n_trials = 0
file_names = []

root_dir = '/mlodata1/lugeon/dream_data/chuv/baseline'

for file in glob.glob(f'{root_dir}/*/*.mat'):
    file_names.append(file.split('/')[-1].split('.')[0])
    n_trials += 1
    
print(f'Number of trials: {n_trials}')

Number of trials: 161


In [37]:
df_files = pd.DataFrame({'file_name': file_names})
df_files['label'] = -1

In [38]:
df_files.head()

Unnamed: 0,file_name,label
0,NREM_c5,-1
1,NREM_c6,-1
2,REM_c1,-1
3,REM_c2,-1
4,REM_c3,-1


In [39]:
df_files.to_csv(
    '/mlodata1/lugeon/dream_data/chuv/metadata/baseline_labels_map.txt', 
    header=None, 
    sep=' ', 
    index=None,
    float_format='%.0f')

## Healthy

In [23]:
n_trials = 0
file_names = []

root_dir = '/mlodata1/lugeon/dream_data/chuv/raw'


for file in glob.glob(f'{root_dir}/*/H*.mat'):
    file_names.append(file.split('/')[-1])
    n_trials += 1

print(f'Number of H trials: {n_trials}')

Number of H trials: 525


In [25]:
df_files = pd.DataFrame({'file_name': file_names})

df_files['exp_name'] = df_files.file_name.apply(lambda f: f.split('.')[0])

df_files['split'] = df_files.exp_name.apply(lambda f: f.split('_'))
df_files['patient'] = df_files.split.apply(lambda l: l[0])
df_files['experiment'] = df_files.split.apply(lambda l: l[1])
df_files['sleep_stage'] = df_files.split.apply(lambda l: l[2])
df_files['trial'] = df_files.split.apply(lambda l: l[3])

df_files.split.apply(len).value_counts()

df_files.head()

Unnamed: 0,file_name,exp_name,split,patient,experiment,sleep_stage,trial
0,H009_E1_NREM_S03.mat,H009_E1_NREM_S03,"[H009, E1, NREM, S03]",H009,E1,NREM,S03
1,H009_E1_NREM_S05.mat,H009_E1_NREM_S05,"[H009, E1, NREM, S05]",H009,E1,NREM,S05
2,H009_E1_NREM_S06.mat,H009_E1_NREM_S06,"[H009, E1, NREM, S06]",H009,E1,NREM,S06
3,H009_E1_NREM_S07.mat,H009_E1_NREM_S07,"[H009, E1, NREM, S07]",H009,E1,NREM,S07
4,H009_E1_NREM_S09.mat,H009_E1_NREM_S09,"[H009, E1, NREM, S09]",H009,E1,NREM,S09


In [26]:

df_labels = pd.read_excel(
    '/mlodata1/yuecetue/eeg-dreams/YuceturkThesis2020/Data/Dataset_info/Dream_labels.xlsx'
)
df_labels.head()

Unnamed: 0,Subject_id,Quest_number,Stage,CE
0,H009_E1,1.0,3.0,1.0
1,H009_E1,2.0,3.0,1.0
2,H009_E1,3.0,3.0,1.0
3,H009_E1,4.0,4.0,2.0
4,H009_E1,5.0,2.0,1.0


In [27]:
df_files['Quest_number'] = df_files.trial.apply(lambda t: int(t[1:]))
df_files['Subject_id'] = df_files.apply(lambda row: f'{row.patient}_{row.experiment}', axis=1)

df_merged = df_files.merge(df_labels, how='outer')
df_merged.head()

Unnamed: 0,file_name,exp_name,split,patient,experiment,sleep_stage,trial,Quest_number,Subject_id,Stage,CE
0,H009_E1_NREM_S03.mat,H009_E1_NREM_S03,"[H009, E1, NREM, S03]",H009,E1,NREM,S03,3.0,H009_E1,3.0,1.0
1,H009_E1_NREM_S05.mat,H009_E1_NREM_S05,"[H009, E1, NREM, S05]",H009,E1,NREM,S05,5.0,H009_E1,2.0,1.0
2,H009_E1_NREM_S06.mat,H009_E1_NREM_S06,"[H009, E1, NREM, S06]",H009,E1,NREM,S06,6.0,H009_E1,2.0,1.0
3,H009_E1_NREM_S07.mat,H009_E1_NREM_S07,"[H009, E1, NREM, S07]",H009,E1,NREM,S07,7.0,H009_E1,2.0,1.0
4,H009_E1_NREM_S09.mat,H009_E1_NREM_S09,"[H009, E1, NREM, S09]",H009,E1,NREM,S09,9.0,H009_E1,2.0,2.0


In [28]:
df_valid = df_merged[(df_merged.CE.notna()) & df_merged.file_name.notna()]

# Number of trials with both data + label
df_valid.shape

(523, 11)

In [40]:
df_valid[['exp_name', 'CE']].to_csv(
    '/mlodata1/lugeon/dream_data/chuv/metadata/healthy_labels_map.txt', 
    header=None, 
    sep=' ', 
    index=None,
    float_format='%.0f')

In [41]:
n_samples = 10

df_valid[['exp_name', 'CE']].sample(n_samples, random_state=42).to_csv(
    '/mlodata1/lugeon/dream_data/chuv/metadata/healthy_labels_map_small.txt', 
    header=None, 
    sep=' ', 
    index=None,
    float_format='%.0f')