### Objective:combine data in flow,apnea and stage by considering start time and duration of Apnea and Stage

### Preparing data from 'Apnea' xml file

In [165]:
#https://docs.python.org/2/library/xml.etree.elementtree.html#xml.etree.ElementTree.ElementTree
from xml.etree.ElementTree import ElementTree
import pandas as pd
import numpy as np
doc=ElementTree()
doc.parse('data/apnea/mesa-sleep-0001-profusion.xml')

<Element 'CMPStudyConfig' at 0x000001E8C6E4F368>

In [166]:
#initializing apnea_data dataframe with required columns
apnea_data=pd.DataFrame(columns=['Duration','Input','Name','Start'])

In [167]:
i=0
for event in doc.find('ScoredEvents'):
    duration=event.find('Duration').text
    inp=event.find('Input').text
    name=event.find('Name').text
    start=event.find('Start').text
    if(name=="Hypopnea"):
        apnea_data.loc[i]=[duration,inp,name,start]
        i=i+1

In [168]:
#printting few lines from top of the dataframe
apnea_data.head()

Unnamed: 0,Duration,Input,Name,Start
0,12.3,Flow,Hypopnea,6114.7
1,11.1,Flow,Hypopnea,6188.9
2,12.0,Flow,Hypopnea,5489.7
3,10.7,Flow,Hypopnea,5552.1
4,11.2,Flow,Hypopnea,5573.4


In [169]:
apnea_data['Duration']=apnea_data['Duration'].astype('float')
apnea_data['Start']=apnea_data['Start'].astype('float')

In [170]:
#adding stop column to dataframe using start and duration
apnea_data['Stop']=apnea_data['Start']+apnea_data['Duration']

In [171]:
apnea_data.head()

Unnamed: 0,Duration,Input,Name,Start,Stop
0,12.3,Flow,Hypopnea,6114.7,6127.0
1,11.1,Flow,Hypopnea,6188.9,6200.0
2,12.0,Flow,Hypopnea,5489.7,5501.7
3,10.7,Flow,Hypopnea,5552.1,5562.8
4,11.2,Flow,Hypopnea,5573.4,5584.6


### Preparing data from 'flow'

In [3]:
import pyedflib
import numpy as np
f=pyedflib.EdfReader('data/flow/mesa-sleep-0001.edf')

In [37]:
n=f.signals_in_file
print('number of signals in file',n)

number of signals in file 27


In [38]:
signal_labels = f.getSignalLabels()
#printing signal labels
print(signal_labels)

['EKG', 'EOG-L', 'EOG-R', 'EMG', 'EEG1', 'EEG2', 'EEG3', 'Pres', 'Flow', 'Snore', 'Thor', 'Abdo', 'Leg', 'Therm', 'Pos', 'EKG_Off', 'EOG-L_Off', 'EOG-R_Off', 'EMG_Off', 'EEG1_Off', 'EEG2_Off', 'EEG3_Off', 'Pleth', 'OxStatus', 'SpO2', 'HR', 'DHR']


In [229]:
#reading the signal having index'8' which is 'Flow'
l=f.readSignal(8)

In [230]:
import numpy as np
epoch=0    #epoch initialized with value '0'
d={}
for s in l:
    d[epoch]=np.round(s,5)
    epoch=epoch+0.03125   #increse by '1/32' in each loop

In [231]:
#preparing dataframe with columns from dictionary
edf_data=pd.DataFrame.from_dict(d,orient='index')

In [232]:
edf_data=edf_data.reset_index()

In [233]:
edf_data=edf_data.rename({'index':'Time',0:'Flow'},axis='columns')

In [234]:
edf_data.sort_values(by='Time',inplace=True,ascending='True')

In [235]:
edf_data=edf_data.reset_index(drop=True)

In [236]:
edf_data.head()

Unnamed: 0,Time,Flow
0,0.0,-0.06986
1,0.03125,-0.08927
2,0.0625,-0.09018
3,0.09375,-0.09165
4,0.125,-0.09384


In [237]:
edf_data.tail()

Unnamed: 0,Time,Flow
1382363,43198.84375,-0.00339
1382364,43198.875,0.00046
1382365,43198.90625,0.00375
1382366,43198.9375,0.00156
1382367,43198.96875,0.00027


### Preparing data from 'Stage'

In [238]:
doc=ElementTree()
doc.parse('data/stage/mesa-sleep-0001-nsrr.xml')

<Element 'PSGAnnotation' at 0x000001E88900A278>

In [239]:
#initializing stage dataframe with required columns
stage_data=pd.DataFrame(columns=['Duration','Stages','Start','Type'])

In [240]:
i=0
for event in doc.find('ScoredEvents'):
    duration=event.find('Duration').text
    stage=event.find('EventConcept').text
    start=event.find('Start').text
    typ=event.find('EventType').text
    if(typ=="Stages|Stages"):
        stage_data.loc[i]=[duration,stage,start,typ]
        i=i+1
        #print(duration,':',stage,':',start,':',typ)

In [241]:
stage_data.head()

Unnamed: 0,Duration,Stages,Start,Type
0,5190.0,Wake|0,0.0,Stages|Stages
1,30.0,Stage 1 sleep|1,5190.0,Stages|Stages
2,90.0,Wake|0,5220.0,Stages|Stages
3,60.0,Stage 1 sleep|1,5310.0,Stages|Stages
4,90.0,Stage 2 sleep|2,5370.0,Stages|Stages


In [242]:
stage_data['Duration']=stage_data['Duration'].astype('float')
stage_data['Start']=stage_data['Start'].astype('float')

In [243]:
#adding stop column to dataframe using start and duration
stage_data['Stop']=stage_data['Start']+stage_data['Duration']

In [244]:
stage_data.head()

Unnamed: 0,Duration,Stages,Start,Type,Stop
0,5190.0,Wake|0,0.0,Stages|Stages,5190.0
1,30.0,Stage 1 sleep|1,5190.0,Stages|Stages,5220.0
2,90.0,Wake|0,5220.0,Stages|Stages,5310.0
3,60.0,Stage 1 sleep|1,5310.0,Stages|Stages,5370.0
4,90.0,Stage 2 sleep|2,5370.0,Stages|Stages,5460.0


### working with flow,apnea

In [245]:
edf_data['Apnea']=-1  #initialize with '-1'
edf_data['Stage']=-1  #initialize with '-1

In [246]:
def apnea(row):
    x=row['Time']
    flag=0   #initialize flag=0 indicates no entry
    for l,m in zip(a,b):
        if ((x>=l) and (x<=m)):
            flag=1
            return flag
    return flag

In [247]:
#mark 0 in column 'Apnea', if apnea_data dataframe has no entry of epoch
#mark 1 in column 'Apnea', if apnea_data dataframe has entry of epoch
a=apnea_data['Start'].values   #start times
b=apnea_data['Stop'].values    #stop times
edf_data['Apnea']=edf_data.apply(apnea,axis=1)

In [248]:
edf_data.head()

Unnamed: 0,Time,Flow,Apnea,Stage
0,0.0,-0.06986,0,-1
1,0.03125,-0.08927,0,-1
2,0.0625,-0.09018,0,-1
3,0.09375,-0.09165,0,-1
4,0.125,-0.09384,0,-1


In [249]:
#see few values which have Apnea=1
edf_data[edf_data.Apnea==1].head()

Unnamed: 0,Time,Flow,Apnea,Stage
175671,5489.71875,0.08853,1,-1
175672,5489.75,0.08835,1,-1
175673,5489.78125,0.08945,1,-1
175674,5489.8125,0.09018,1,-1
175675,5489.84375,0.08798,1,-1


### working with stage using flow,apne

In [262]:
def stage(row):
    if(row['Apnea']==1):
        x=row['Time']
        for l,m,n in zip(a,b,Stages):
            if((x>=l) and (x<=m)):
                n=str(n).split('|')[1]
                return(n)
        return 0
    if(row['Apnea']==0):
        return 0

In [263]:
stage_data.Stages.unique()

array(['Wake|0', 'Stage 1 sleep|1', 'Stage 2 sleep|2', 'REM sleep|5',
       'Stage 3 sleep|3'], dtype=object)

In [264]:
a=stage_data['Start'].values   #start times
b=stage_data['Stop'].values    #stop times
Stages=stage_data['Stages'].values
edf_data['Stage']=edf_data.apply(stage,axis=1)

In [270]:
edf_data[edf_data.Stage=='1'].head()

Unnamed: 0,Time,Flow,Apnea,Stage
175671,5489.71875,0.08853,1,1
175672,5489.75,0.08835,1,1
175673,5489.78125,0.08945,1,1
175674,5489.8125,0.09018,1,1
175675,5489.84375,0.08798,1,1


In [272]:
edf_data[edf_data.Stage=='1'].shape[0]

10154

#### number of records with 'Apnea==1' and 'stage==1'  is 10154

In [267]:
edf_data[edf_data.Stage=='2'].head()

Unnamed: 0,Time,Flow,Apnea,Stage
175681,5490.03125,0.08359,1,2
175682,5490.0625,0.08139,1,2
175683,5490.09375,0.07645,1,2
175684,5490.125,0.06729,1,2
175685,5490.15625,0.05942,1,2


In [273]:
edf_data[edf_data.Stage=='2'].shape[0]

37238

#### number of records with 'Apnea==1' and 'stage==2'  is 37238

In [275]:
edf_data[edf_data.Stage=='3'].head()

Unnamed: 0,Time,Flow,Apnea,Stage
593281,18540.03125,-0.0303,1,3
593282,18540.0625,-0.03067,1,3
593283,18540.09375,-0.03122,1,3
593284,18540.125,-0.03159,1,3
593285,18540.15625,-0.0325,1,3


In [276]:
edf_data[edf_data.Stage=='3'].shape[0]

767

#### number of records with 'Apnea==1' and 'stage==2'  is 767

In [277]:
edf_data[edf_data.Stage=='5'].head()

Unnamed: 0,Time,Flow,Apnea,Stage
799946,24998.3125,0.00705,1,5
799947,24998.34375,0.01401,1,5
799948,24998.375,0.02756,1,5
799949,24998.40625,0.04385,1,5
799950,24998.4375,0.05686,1,5


In [278]:
edf_data[edf_data.Stage=='5'].shape[0]

7586

#### number of records with 'Apnea==1' and 'stage==2'  is 7586

## Result

### 'edf_data' data frame contains merged data from flow,apnea and stage

In [279]:
edf_data

Unnamed: 0,Time,Flow,Apnea,Stage
0,0.00000,-0.06986,0,0
1,0.03125,-0.08927,0,0
2,0.06250,-0.09018,0,0
3,0.09375,-0.09165,0,0
4,0.12500,-0.09384,0,0
5,0.15625,-0.09677,0,0
6,0.18750,-0.09824,0,0
7,0.21875,-0.09769,0,0
8,0.25000,-0.09586,0,0
9,0.28125,-0.08981,0,0
