# Process MASS data structure

In [44]:
import os
import pandas as pd
import pyedflib
from plotly import express as px
from matplotlib import pyplot as plt

In [2]:
edfpath="D:\SpinCo\MASS\edfs"
annopath="D:\SpinCo\MASS\\annotations"
signalspath="D:\SpinCo\MASS\signals"

## Annotations

In [33]:
allfiles=os.listdir(annopath)
subjectIds=list()
extensions=list()
types=list()
edfFiles=list()
experts=list()
for file in allfiles:
    aux1=file.split(".")
    if aux1[1]=="edf":
        aux2=aux1[0].split(" ")
        if not aux2[1]=="Base":
            aux3=aux2[0].split("-")
            aux4=aux2[1].split("_")

            subjectIds.append(aux3[2])
            extensions.append(aux1[1])
            types.append(aux4[0])
            experts.append(aux4[1])
            edfFiles.append(file)

annoData=pd.DataFrame({
    "subjectId":subjectIds,
    "file":edfFiles,
    "extension":extensions,
    "type":types,
    "expert":experts
})

In [34]:
types=list()
subjectIds=list()
starts=list()
durations=list()
experts=list()
comments=list()

for ind, row in annoData.iterrows():
    fullPath=annopath+"/"+row.file
    signals, signal_headers, header = pyedflib.highlevel.read_edf(fullPath)
    for anno in header['annotations']:
        types.append(row.type)
        subjectIds.append(row.subjectId)
        starts.append(anno[0])
        durations.append(anno[1])
        experts.append(row.expert)
        comments.append(anno[2])

annotations=pd.DataFrame({
    "type":types,
    "subjectId":subjectIds,
    "startTime":starts,
    "duration":durations,
    "expert":experts,
    "comment":comments
})

In [35]:
annotations

Unnamed: 0,type,subjectId,startTime,duration,expert,comment
0,KComplexes,0001,830.838372,0.699174,E1,"<Event channel=""EEG C3-LER"" groupName=""kcomple..."
1,KComplexes,0001,841.223012,0.492156,E1,"<Event channel=""EEG C3-LER"" groupName=""kcomple..."
2,KComplexes,0001,970.838374,0.578088,E1,"<Event channel=""EEG C3-LER"" groupName=""kcomple..."
3,KComplexes,0001,1050.014503,0.695268,E1,"<Event channel=""EEG C3-LER"" groupName=""kcomple..."
4,KComplexes,0001,1077.473271,0.648396,E1,"<Event channel=""EEG C3-LER"" groupName=""kcomple..."
...,...,...,...,...,...,...
45035,Spindles,0019,25434.045319,0.800730,E2,"<Event channel=""EEG C3-LER"" groupName=""spindle..."
45036,Spindles,0019,25438.822357,0.472626,E2,"<Event channel=""EEG C3-LER"" groupName=""spindle..."
45037,Spindles,0019,25444.489963,1.035090,E2,"<Event channel=""EEG C3-LER"" groupName=""spindle..."
45038,Spindles,0019,25457.542096,0.890568,E2,"<Event channel=""EEG C3-LER"" groupName=""spindle..."


In [36]:
aux=annotations[["type","expert","subjectId"]].groupby(["type","expert","subjectId"],as_index=False).size()
px.bar(aux,x="subjectId",y="size",color="expert",barmode="group",facet_row="type",title="MASS annotations overview")

In [37]:
spinData=annotations[annotations.type=="Spindles"].reset_index(drop=True)
spinData.type="spindle"

In [38]:
spinData.to_csv(annopath+"\\annotations.csv")

## Signals

************************************* YOU ARE HERE **************************************

In [39]:
allfiles=os.listdir(edfpath)
subjectIds=list()
extensions=list()
types=list()
edfFiles=list()
for file in allfiles:
    aux1=file.split(".")
    if aux1[1]=="edf":
        aux2=aux1[0].split(" ")
        aux3=aux2[0].split("-")
        subjectIds.append(aux3[2])
        extensions.append(aux1[1])
        types.append(aux2[1])
        edfFiles.append(file)

edfsData=pd.DataFrame({
    "subjectId":subjectIds,
    "file":edfFiles,
    "extension":extensions,
    "type":types
})

In [46]:
psgData=edfsData[edfsData.type=="PSG"].reset_index(drop=True)
for ind, row in psgData.iterrows():
    print("**********************************************")
    print(ind)
    print(row)
    fullPath=edfpath+"/"+row.file
    signals, signal_headers, header = pyedflib.highlevel.read_edf(fullPath)
    for (signal,header) in zip(signals,signal_headers):
        if "C3" in header['label']:
            print(header)
    print("**********************************************")


**********************************************
0
subjectId                  0001
file         01-02-0001 PSG.edf
extension                   edf
type                        PSG
Name: 0, dtype: object
{'label': 'EEG C3-CLE', 'dimension': 'uV', 'sample_rate': 512.0, 'sample_frequency': 512.0, 'physical_max': 397.0, 'physical_min': -397.0, 'digital_max': 32767, 'digital_min': -32768, 'prefilter': '', 'transducer': ''}
**********************************************
**********************************************
1
subjectId                  0002
file         01-02-0002 PSG.edf
extension                   edf
type                        PSG
Name: 1, dtype: object
{'label': 'EEG C3-CLE', 'dimension': 'uV', 'sample_rate': 512.0, 'sample_frequency': 512.0, 'physical_max': 393.0, 'physical_min': -393.0, 'digital_max': 32767, 'digital_min': -32768, 'prefilter': '', 'transducer': ''}
**********************************************
**********************************************
2
subjectId          

In [47]:
for (signal,header) in zip(signals,signal_headers):
    print(header['label'])


EEG Fp1-CLE
EEG Fp2-CLE
EEG F3-CLE
EEG F4-CLE
EEG F7-CLE
EEG F8-CLE
EEG C3-CLE
EEG C4-CLE
EEG P3-CLE
EEG P4-CLE
EEG O1-CLE
EEG O2-CLE
EEG T3-CLE
EEG T4-CLE
EEG T5-CLE
EEG T6-CLE
EEG Fpz-CLE
EEG Cz-CLE
EEG Pz-CLE
EOG Upper Vertic
EOG Lower Vertic
EOG Left Horiz
EOG Right Horiz
EMG Chin
ECG ECGI
Resp Nasal
EEG A2-CLE
