# MASS graphical information


In [18]:
#external libraries
import os
import dotenv
import pandas as pd
import numpy as np
from scipy import signal as sg
import pickle as pkl
from plotly import express as px
from plotly import graph_objects as go
from matplotlib import pyplot as plt
import matplotlib.colors as clt

#project library
from spinco import *

#environment variables
dotenv.load_dotenv('lab.env')

#project variables
datapath=os.environ['DATAPATH']

In [19]:
#define features path
masspath=datapath+"\MASS"
featurespath=masspath+"\\features"
windowDurations=[0.5,1,1.5,2]

## Load data

In [20]:
#load data THIS NEEDS REFINEMENT AND CONVERGENCE TO USE WITH MULTIPLE DATABASES
def loadMASSSpindles(path):
    #signalsMetadata
    signalsMetadata=pd.read_csv(path+'\\signals\\signalsMetadata.csv')
    signalsMetadata['subjectId']=signalsMetadata.apply(
        lambda row: str(row.subjectId).zfill(4),axis=1)

    #load signals from pickle
    signals={}
    for index, row in signalsMetadata.iterrows():
        signalpath=path+"/signals/"+row.file
        cFile = open(signalpath, 'rb')
        signals[row.subjectId]= pkl.load(cFile)
        cFile.close()

    #spindle annotations
    annotations=pd.read_csv(path+'\\annotations\\annotations.csv')
    annotations['subjectId']=annotations.apply(
        lambda row: str(row.subjectId).zfill(4),axis=1)
    annotations['labelerId']=annotations.apply(
        lambda row: str(row.labelerId).zfill(4),axis=1)
    
    #add stop and index colums
    annotations=annotations.merge(signalsMetadata[['subjectId','samplerate']],how='left',on='subjectId')
    annotations['stopTime']=annotations.apply(
        lambda row: row.startTime+row.duration , axis=1)
    annotations['startInd']=annotations.apply(
        lambda row: seconds2index(row.startTime,row.samplerate) , axis=1)
    annotations['stopInd']=annotations.apply(
        lambda row: seconds2index(row.stopTime,row.samplerate) , axis=1)

    return signals, annotations, signalsMetadata

In [21]:
signals, annotations, signalsMetadata = loadMASSSpindles(masspath)

In [22]:
signalsMetadata.head(5)

Unnamed: 0,subjectId,file,channel,duration,samplerate
0,1,MASS_0001.pkl,C3-CLE,28956.0,256
1,2,MASS_0002.pkl,C3-CLE,35016.0,256
2,3,MASS_0003.pkl,C3-CLE,36760.0,256
3,4,MASS_0004.pkl,C3-CLE,28004.0,256
4,5,MASS_0005.pkl,C3-CLE,31244.0,256


## Resample to 25 Hz - ONLY FOR GAPHICAL USE
it is important to make it this way, original data should never be modifyed

In [23]:
samplerate=25

In [24]:
print(256*25/256)   #<- TBD: make this automatic
# WARNING: parameters hardcoded ----------------------->
myUp=25
myDown=256
#<------------------------------------------------------

25.0


In [25]:
signalsMetadata

Unnamed: 0,subjectId,file,channel,duration,samplerate
0,1,MASS_0001.pkl,C3-CLE,28956.0,256
1,2,MASS_0002.pkl,C3-CLE,35016.0,256
2,3,MASS_0003.pkl,C3-CLE,36760.0,256
3,4,MASS_0004.pkl,C3-CLE,28004.0,256
4,5,MASS_0005.pkl,C3-CLE,31244.0,256
5,6,MASS_0006.pkl,C3-CLE,28990.0,256
6,7,MASS_0007.pkl,C3-CLE,28302.0,256
7,8,MASS_0008.pkl,C3-CLE,26846.0,256
8,9,MASS_0009.pkl,C3-CLE,29834.0,256
9,10,MASS_0010.pkl,C3-CLE,25930.0,256


In [26]:
#1. resample
for ind, row in signalsMetadata.iterrows():
    signals[row.subjectId]=sg.resample_poly(signals[row.subjectId],up=myUp,down=myDown)

#2. update metadata
signalsMetadata["samplerate"]=samplerate
signalsMetadata["duration"]=signalsMetadata.apply(
    lambda row: len(signals[row.subjectId])/row.samplerate,
    axis=1) #it should be the exact same duration

#3. update annotations
annotations['samplerate']=samplerate
annotations['startInd']=annotations.apply(
    lambda row: seconds2index(row.startTime,row.samplerate),
    axis=1)
annotations['stopInd']=annotations.apply(
    lambda row: seconds2index(row.stopTime,row.samplerate),
    axis=1)


In [27]:
signalsMetadata

Unnamed: 0,subjectId,file,channel,duration,samplerate
0,1,MASS_0001.pkl,C3-CLE,28956.0,25
1,2,MASS_0002.pkl,C3-CLE,35016.0,25
2,3,MASS_0003.pkl,C3-CLE,36760.0,25
3,4,MASS_0004.pkl,C3-CLE,28004.0,25
4,5,MASS_0005.pkl,C3-CLE,31244.0,25
5,6,MASS_0006.pkl,C3-CLE,28990.0,25
6,7,MASS_0007.pkl,C3-CLE,28302.0,25
7,8,MASS_0008.pkl,C3-CLE,26846.0,25
8,9,MASS_0009.pkl,C3-CLE,29834.0,25
9,10,MASS_0010.pkl,C3-CLE,25930.0,25


### graphical representation of the annotations by subject

In [28]:
#create a color scale for the labelers
aux=plt.get_cmap('viridis')

labelers=np.unique(annotations.labelerId)
labelersCount=len(labelers)
if labelersCount>1:
    aux=np.linspace(0,1,labelersCount)
else:   #avoid exception with only 1 labeler
    aux=0
rgbaValues=plt.get_cmap('viridis')(aux)
labelerColors={}
for i in range(labelersCount):
    labelerColors[labelers[i]]=clt.rgb2hex(rgbaValues[i])

In [30]:
folder=masspath+"/graphical"
if not os.path.isdir(folder):
        os.mkdir(folder)
annotations_bySubject=annotations.groupby('subjectId')
IoU={}

for subjectId, thisAnnotations in annotations_bySubject:
    signal=signals[subjectId]
    timepoints=np.arange(len(signal))/samplerate
    fig=go.Figure()
    fig.add_scatter(x=timepoints,y=signal)
    thisAnnotations_byLabeler=thisAnnotations.groupby('labelerId')
    labeledVect={}
    for labelerId, dataframe in thisAnnotations_byLabeler:
        #compute metrics ************************
        labeledVect[labelerId]=np.zeros_like(signal)
        #****************************************
        dataframe=dataframe.reset_index()   #needed to use i as it's done in the following loop
        for i in range(len(dataframe)):
            fig.add_vrect(x0=timepoints[dataframe.startInd[i]],x1=timepoints[dataframe.stopInd[i]],fillcolor=labelerColors[dataframe.labelerId[i]], opacity=0.25,annotation_text=str(np.round(dataframe.duration[i],2))+"s",annotation_position="top left")
            #compute metrics ************************
            labeledVect[labelerId][dataframe.startInd[i]:dataframe.stopInd[i]]=1
            #****************************************
    #compute metrics ************************
    if len(labeledVect)==2:
        keyList=list(labeledVect.keys())
        IoU[subjectId]=binaryIoU(labeledVect[keyList[0]],labeledVect[keyList[1]])
        fig.update_layout(title_text='MASS subject '+subjectId+' spindles, inter-labeler IoU: '+str(round(IoU[subjectId],2)), title_x=0.5)
    #****************************************
    else:
        IoU[subjectId]=np.NaN
        fig.update_layout(title_text='subject '+subjectId+' spindles', title_x=0.5)
    fig.write_html(folder+"/"+subjectId+"_labelersInteractive.html")
