In [1]:
#external libraries
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib.colors as clt
import plotly
import plotly.subplots as sb
import plotly.express as px
import plotly.graph_objects as go
import dotenv
import pandas as pd
import scipy.fft as fft
import scipy.signal as sg
import scipy.io as sio
import pickle as pkl
import xgboost as xgb
import time

#project library
from spinco import *

#environment variables
dotenv.load_dotenv('lab.env')

#project variables
datapath=os.environ['DATAPATH']
cognipath=datapath+"\\COGNITION"
dreamspath=datapath+"\\DREAMS"
masspath=datapath+"\\MASS"


## load mass

In [2]:
samplerate=200  #Should rethink this

In [3]:
annotations, signalsMetadata = loadMASSSpindles(masspath,forceSamplerate=samplerate)

In [4]:
annotations.head()

Unnamed: 0,type,expert,subjectId,labelerId,startTime,duration,samplerate,stopTime,startInd,stopInd
0,kcomplex,E1,1,1,830.596676,0.699174,200,831.29585,166119,166259
1,kcomplex,E1,1,1,840.981316,0.492156,200,841.473472,168196,168295
2,kcomplex,E1,1,1,970.596678,0.578088,200,971.174766,194119,194235
3,kcomplex,E1,1,1,1049.772807,0.695268,200,1050.468075,209955,210094
4,kcomplex,E1,1,1,1077.231575,0.648396,200,1077.879971,215446,215576


In [5]:
signalsMetadata.head()

Unnamed: 0,subjectId,file,channel,duration,samplerate,isOriginalSamplerate,database
0,1,MASS_0001.pkl,C3-CLE,28956.0,200,False,MASS
1,2,MASS_0002.pkl,C3-CLE,35016.0,200,False,MASS
2,3,MASS_0003.pkl,C3-CLE,36760.0,200,False,MASS
3,4,MASS_0004.pkl,C3-CLE,28004.0,200,False,MASS
4,5,MASS_0005.pkl,C3-CLE,31244.0,200,False,MASS


## Load experiment results

In [6]:
[results_CV,featureSelection]=loadPickle("Experiment002_CV_1669883512.373571.pkl")

In [7]:
results_CV

Unnamed: 0,train,val,test,model,treeLimit,valF1
0,"[0002, 0003, 0004, 0005, 0007, 0008, 0009, 001...","[0006, 0014]",[0001],"XGBClassifier(base_score=0.5, booster='gbtree'...",50,0.623415
1,"[0001, 0003, 0004, 0005, 0006, 0007, 0008, 000...","[0012, 0010]",[0002],"XGBClassifier(base_score=0.5, booster='gbtree'...",54,0.560644
2,"[0002, 0004, 0005, 0006, 0007, 0008, 0009, 001...","[0001, 0012]",[0003],"XGBClassifier(base_score=0.5, booster='gbtree'...",61,0.373103
3,"[0001, 0003, 0005, 0006, 0008, 0009, 0010, 001...","[0002, 0007]",[0004],"XGBClassifier(base_score=0.5, booster='gbtree'...",61,0.619176
4,"[0001, 0002, 0003, 0004, 0006, 0007, 0008, 000...","[0011, 0016]",[0005],"XGBClassifier(base_score=0.5, booster='gbtree'...",55,0.641606
5,"[0001, 0002, 0003, 0004, 0005, 0007, 0008, 000...","[0011, 0016]",[0006],"XGBClassifier(base_score=0.5, booster='gbtree'...",54,0.642723
6,"[0001, 0002, 0003, 0004, 0005, 0006, 0008, 000...","[0010, 0018]",[0007],"XGBClassifier(base_score=0.5, booster='gbtree'...",46,0.592171
7,"[0001, 0002, 0003, 0004, 0005, 0007, 0009, 001...","[0015, 0006]",[0008],"XGBClassifier(base_score=0.5, booster='gbtree'...",80,0.43213
8,"[0001, 0002, 0003, 0004, 0006, 0008, 0010, 001...","[0005, 0007]",[0009],"XGBClassifier(base_score=0.5, booster='gbtree'...",46,0.594039
9,"[0001, 0002, 0003, 0004, 0005, 0006, 0007, 000...","[0011, 0009]",[0010],"XGBClassifier(base_score=0.5, booster='gbtree'...",55,0.564562


In [8]:
featureSelection

Unnamed: 0,characteristic,bandName,window
21,hjortActivity,sigma,0.5
66,petrosian,broadband,0.5
22,hjortActivity,theta,0.5
79,relativePower,beta1,0.5
29,hjortComplexity,sigma,0.5


## Annotation criterium

In [9]:
usedAnnotations=annotations[(annotations['type']=='spindle')&(annotations['labelerId']=='0001')]

## Grid search of hyperparameters

In [10]:
from itertools import product

allClose=[0.1,0.25,0.4,0.55]
allDuration=[0.1,0.3,0.4,0.5]
allConfidence=[0.0,0.25,0.5,0.75]

In [11]:
closes=list()
durations=list()
confidences=list()
for hyper in product(allClose, allDuration,allConfidence):
    closes.append(hyper[0])
    durations.append(hyper[1])
    confidences.append(hyper[2])

hyperparams=pd.DataFrame({
    "close":closes,
    "duration":durations,
    "confidence":confidences
})

hyperparams


Unnamed: 0,close,duration,confidence
0,0.10,0.1,0.00
1,0.10,0.1,0.25
2,0.10,0.1,0.50
3,0.10,0.1,0.75
4,0.10,0.3,0.00
...,...,...,...
59,0.55,0.4,0.75
60,0.55,0.5,0.00
61,0.55,0.5,0.25
62,0.55,0.5,0.50


## Evaluate each row for each hyperparameter selection

In [12]:
#Set by_event IoU threshold
masterIoU=0.3

#Be conservative, keep the minimum tree limit
masterTreeLimit=np.min(results_CV.treeLimit)
masterTreeLimit


44

In [15]:
modelMetrics=pd.DataFrame()

for ind_hyper, hyper in hyperparams.iterrows():
    for ind_CV, row in results_CV.iterrows():
        #load testing
        testFeatures=loadFeatureMatrix(row.test,featureSelection,signalsMetadata,samplerate,datapath)
        testLabels=loadLabelsVector(row.test,usedAnnotations,signalsMetadata,samplerate)
        #predict labels
        predictedLabels=row.model.predict(testFeatures,ntree_limit=masterTreeLimit)
        #process labels
        processedLabels=labelingProcess(predictedLabels,hyper.close,hyper.duration,samplerate)
        #get confidence score and filter
        predictedAnnotations=labelVectorToAnnotations(predictedLabels,samplerate)
        processedAnnotations=labelVectorToAnnotations(processedLabels,samplerate)
        processedAnnotations['confidence']=processedAnnotations.apply(
            lambda row: getConfidence(row,predictedAnnotations)
            ,axis=1)
        finalAnnotations=processedAnnotations[processedAnnotations.confidence>hyper.confidence].copy().reset_index(drop=True)
        #by event calculations
        gtAnnotations=labelVectorToAnnotations(testLabels,samplerate)   #<- or just filter the annotations
        tp,fp,fn = byEventEvaluation(gtAnnotations,finalAnnotations,thres_IoU=masterIoU)
        #consistency check
        check=tp*2+fp+fn==len(gtAnnotations)+len(finalAnnotations)
        #calculate metrics
        finalLabels=excerptAnnotationsToLabels(finalAnnotations,len(testLabels))
        vector_F1_raw=F1(testLabels,predictedLabels)
        vector_F1=F1(testLabels,finalLabels)
        event_F1=2*tp/(2*tp+fp+fn)
        event_Precision=tp/(tp+fp)
        event_Recall=tp/(tp+fn)
        event_TS=tp/(tp+fn+fp)
        
        #append to the dataframe
        newRow=pd.Series({
            "ind_hyper":ind_hyper,
            "ind_model":ind_CV,
            "thres_IoU":masterIoU,
            "treeLimit":masterTreeLimit,
            "check":check,
            "final_count":len(gtAnnotations),
            "gt_count":len(finalAnnotations),
            "tp":tp,
            "fp":fp,
            "fn":fn,
            "vector_F1_raw":vector_F1_raw,
            "vector_F1":vector_F1,
            "event_F1":event_F1,
            "event_Precision":event_Precision,
            "event_Recall":event_Recall,
            "event_TS":event_TS})
        modelMetrics=pd.concat([modelMetrics,newRow.to_frame().T],ignore_index=True)
        
        if not check:
            print("check failed on hyper "+str(ind_hyper)+" and CV "+str(ind_CV))
            break




KeyboardInterrupt: 

In [None]:
modelMetrics.to_csv("modelMetrics_"+"Experiment002_CV_1669883512.373571.csv")

Unnamed: 0,ind_hyper,ind_model,thres_IoU,treeLimit,check,final_count,gt_count,tp,fp,fn,vector_F1_raw,vector_F1,event_F1,event_Precision,event_Recall,event_TS
0,0,4,0.3,44,True,341,387,233,154,108,0.554651,0.58576,0.64011,0.602067,0.683284,0.470707
1,0,5,0.3,44,True,150,185,97,88,53,0.494842,0.540688,0.579104,0.524324,0.646667,0.407563
2,0,6,0.3,44,False,912,1836,776,1061,136,0.595285,0.598635,0.564569,0.422428,0.850877,0.39331


In [14]:
modelMetrics