# Check annotation consistency in MASS


In [1]:
#external libraries
import os
import dotenv
import pandas as pd
import numpy as np
from scipy import signal as sg
import pickle as pkl
from plotly import express as px
from plotly import graph_objects as go
from matplotlib import pyplot as plt
import matplotlib.colors as clt

#project library
from testing import *

#environment variables
dotenv.load_dotenv('lab.env')

#project variables
datapath=os.environ['DATAPATH']

In [2]:
#define features path
masspath=datapath+"\MASS"
featurespath=masspath+"\\features"
windowDurations=[0.5,1,1.5,2]

## Load data

In [3]:
#load data THIS NEEDS REFINEMENT AND CONVERGENCE TO USE WITH MULTIPLE DATABASES
def loadMASSSpindles(path):
    #signalsMetadata
    signalsMetadata=pd.read_csv(path+'\\signals\\signalsMetadata.csv')
    signalsMetadata['subjectId']=signalsMetadata.apply(
        lambda row: str(row.subjectId).zfill(4),axis=1)

    #load signals from pickle
    signals={}
    for index, row in signalsMetadata.iterrows():
        signalpath=path+"/signals/"+row.file
        cFile = open(signalpath, 'rb')
        signals[row.subjectId]= pkl.load(cFile)
        cFile.close()

    #spindle annotations
    annotations=pd.read_csv(path+'\\annotations\\annotations.csv')
    annotations['subjectId']=annotations.apply(
        lambda row: str(row.subjectId).zfill(4),axis=1)
    annotations['labelerId']=annotations.apply(
        lambda row: str(row.labelerId).zfill(4),axis=1)
    
    #add stop and index colums
    annotations=annotations.merge(signalsMetadata[['subjectId','samplerate']],how='left',on='subjectId')
    annotations['stopTime']=annotations.apply(
        lambda row: row.startTime+row.duration , axis=1)
    annotations['startInd']=annotations.apply(
        lambda row: seconds2index(row.startTime,row.samplerate) , axis=1)
    annotations['stopInd']=annotations.apply(
        lambda row: seconds2index(row.stopTime,row.samplerate) , axis=1)

    return signals, annotations, signalsMetadata

In [4]:
signals, annotations, signalsMetadata = loadMASSSpindles(masspath)

In [5]:
signalsMetadata.head(5)

Unnamed: 0,subjectId,file,channel,duration,samplerate
0,1,MASS_0001.pkl,C3-CLE,28956.0,256
1,2,MASS_0002.pkl,C3-CLE,35016.0,256
2,3,MASS_0003.pkl,C3-CLE,36760.0,256
3,4,MASS_0004.pkl,C3-CLE,28004.0,256
4,5,MASS_0005.pkl,C3-CLE,31244.0,256


In [6]:
del signals

In [7]:
aux=annotations.merge(signalsMetadata[["subjectId","duration"]],on="subjectId",suffixes=("a",""))


In [8]:
aux=aux[['subjectId','stopTime','duration']].groupby(['subjectId']).max()

aux

Unnamed: 0_level_0,stopTime,duration
subjectId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,28746.766835,28956.0
2,30054.419802,35016.0
3,28737.068461,36760.0
4,25933.537,28004.0
5,31082.312429,31244.0
6,27816.998876,28990.0
7,27773.277887,28302.0
8,25836.522934,26846.0
9,27665.581066,29834.0
10,24250.160714,25930.0


In [9]:
aux['check']=aux.apply(
    lambda row: row.duration>row.stopTime,
    axis=1)
    
aux

Unnamed: 0_level_0,stopTime,duration,check
subjectId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,28746.766835,28956.0,True
2,30054.419802,35016.0,True
3,28737.068461,36760.0,True
4,25933.537,28004.0,True
5,31082.312429,31244.0,True
6,27816.998876,28990.0,True
7,27773.277887,28302.0,True
8,25836.522934,26846.0,True
9,27665.581066,29834.0,True
10,24250.160714,25930.0,True
