# Testing for E1 - using all spindles, training with all data
fistly define the UUID of the experiment to use

In [45]:
experimentId="56c86206-b216-4e8a-b34a-03d33136ac83"

-------------------------------------------------------

In [46]:
#external libraries
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib.colors as clt
import plotly
import plotly.subplots as sb
import plotly.express as px
import plotly.graph_objects as go
import dotenv
import pandas as pd
import scipy.fft as fft
import scipy.signal as sg
import scipy.io as sio
import pickle as pkl
import xgboost as xgb
import time
import sklearn.metrics as skm

#project library
from spinco import *

#environment variables
dotenv.load_dotenv('lab.env')

#project variables
datapath=os.environ['DATAPATH']
cognipath=datapath+"\\COGNITION"
dreamspath=datapath+"\\DREAMS"
masspath=datapath+"\\MASS"


## load mass

In [47]:
samplerate=200  #Should rethink this

In [48]:
annotations, signalsMetadata = loadMASSSpindles(masspath,forceSamplerate=samplerate)

In [49]:
#consider to include this in a function
minDuration=0.3
maxDuration=5
annotations=annotations[annotations.type=='spindle']
annotations=annotations[annotations.duration>minDuration]
annotations=annotations[annotations.duration<maxDuration]
annotations=annotations.reset_index(drop=True)

In [50]:
#check with EDA results (not needed)
print(1-len(annotations)/33458)

0.005648873214178951


In [51]:
annotations.head()

Unnamed: 0,type,expert,subjectId,labelerId,startTime,duration,samplerate,stopTime,startInd,stopInd
0,spindle,E1,1,1,888.327805,0.640579,200,888.968384,177666,177794
1,spindle,E1,1,1,905.758061,0.578094,200,906.336155,181152,181267
2,spindle,E1,1,1,917.731574,0.847603,200,918.579177,183546,183716
3,spindle,E1,1,1,922.078189,0.878845,200,922.957034,184416,184591
4,spindle,E1,1,1,939.055445,0.757767,200,939.813212,187811,187963


In [52]:
signalsMetadata.head()

Unnamed: 0,subjectId,file,channel,duration,samplerate,isOriginalSamplerate,database
0,1,MASS_0001.pkl,C3-CLE,28956.0,200,False,MASS
1,2,MASS_0002.pkl,C3-CLE,35016.0,200,False,MASS
2,3,MASS_0003.pkl,C3-CLE,36760.0,200,False,MASS
3,4,MASS_0004.pkl,C3-CLE,28004.0,200,False,MASS
4,5,MASS_0005.pkl,C3-CLE,31244.0,200,False,MASS


## Load experiment results

In [53]:
experimentModels, featureSelection = loadExperiment(experimentId,datapath)

In [54]:
experimentModels

Unnamed: 0,criteriumId,criteriumName,labelerIdList,train,val,test,modelId,spindleTimeRate
0,1,E1,[0001],"[0003, 0005, 0007, 0009, 0010, 0011, 0012, 001...","[0017, 0002, 0006]",1,bdbdfc7b-0c97-484c-b4e9-70e698d9b353,0.018639
1,1,E1,[0001],"[0001, 0003, 0005, 0007, 0009, 0010, 0011, 001...","[0006, 0013, 0019]",2,bae09da0-598a-46ed-b46a-553a9d1de380,0.020055
2,1,E1,[0001],"[0002, 0005, 0007, 0009, 0010, 0011, 0012, 001...","[0013, 0006, 0001]",3,2c77d9d9-8a36-4e76-ac92-65f9318bd100,0.021075
3,1,E1,[0001],"[0001, 0002, 0003, 0006, 0007, 0009, 0010, 001...","[0018, 0014, 0012]",5,dc49a81b-fa55-4b8e-989b-e40bd13a4302,0.018331
4,1,E1,[0001],"[0003, 0007, 0009, 0010, 0011, 0012, 0013, 001...","[0005, 0001, 0002]",6,3d3c90d6-8d87-4dd0-85ec-9eedee1565cb,0.019222
5,1,E1,[0001],"[0001, 0002, 0003, 0010, 0011, 0012, 0013, 001...","[0005, 0006, 0009]",7,6622a394-926c-45ad-a0f5-eb1def948317,0.019625
6,1,E1,[0001],"[0001, 0003, 0005, 0006, 0007, 0010, 0011, 001...","[0002, 0017, 0013]",9,91153884-8b2d-49a1-86cd-386a1add8f8e,0.017824
7,1,E1,[0001],"[0002, 0003, 0005, 0006, 0007, 0009, 0012, 001...","[0017, 0001, 0011]",10,672f9a5b-bab6-4a47-8889-b20041f97fe8,0.017542
8,1,E1,[0001],"[0001, 0003, 0005, 0006, 0009, 0010, 0012, 001...","[0002, 0018, 0007]",11,98885354-4ef8-4d7f-b64d-8fb9a1edc578,0.015442
9,1,E1,[0001],"[0002, 0003, 0005, 0006, 0009, 0011, 0013, 001...","[0001, 0007, 0010]",12,39389298-e7f5-4de1-9dc7-bcf9a95245bd,0.01646


In [55]:
#we show the difference in class inbalance for the annotation criteria considered
experimentModels[['criteriumName','spindleTimeRate']].groupby('criteriumName').describe()

Unnamed: 0_level_0,spindleTimeRate,spindleTimeRate,spindleTimeRate,spindleTimeRate,spindleTimeRate,spindleTimeRate,spindleTimeRate,spindleTimeRate
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
criteriumName,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
E1,15.0,0.018463,0.001834,0.015442,0.017419,0.018331,0.019424,0.022439
E2,15.0,0.05832,0.00452,0.052347,0.054965,0.056746,0.061623,0.067005
union,15.0,0.059557,0.004579,0.053738,0.055972,0.058222,0.062711,0.068526


In [56]:
featureSelection

Unnamed: 0,window,characteristic,bandName,Times_selected
0,2.0,sigmaIndex,broadband,1000.0
1,2.0,hjortActivity,sigma,1000.0
2,1.5,sigmaIndex,broadband,1000.0
3,1.0,hjortActivity,sigma,1000.0
4,1.0,sigmaIndex,broadband,1000.0
5,2.0,hjortActivity,beta1,1000.0
6,2.0,hjortActivity,beta2,1000.0
7,1.5,hjortActivity,sigma,999.0
8,2.0,hjortMobility,beta1,994.0
9,2.0,hjortActivity,delta2,986.0


## Hyperparameter definition
this should come from a previous evaluation notebook

In [57]:
auxThres=pd.DataFrame({
    'hyperThres':[0.1, 0.2,0.3, 0.4, 0.5]
})
auxDepth=pd.DataFrame({
    'hyperDepth':[10,20,30,40, 50, 60]
})
auxMinDuration=pd.DataFrame({
    'hyperMinDuration':[0.3]
})
auxMaxDuration=pd.DataFrame({
    'hyperMaxDuration':[5]
})
auxClose=pd.DataFrame({
    'hyperClose':[0.1]
})


hyperParams=pd.merge(auxThres,auxDepth,how='cross')
hyperParams=pd.merge(hyperParams,auxMinDuration,how='cross')
hyperParams=pd.merge(hyperParams,auxMaxDuration,how='cross')
hyperParams=pd.merge(hyperParams,auxClose,how='cross')

hyperParams

Unnamed: 0,hyperThres,hyperDepth,hyperMinDuration,hyperMaxDuration,hyperClose
0,0.1,10,0.3,5,0.1
1,0.1,20,0.3,5,0.1
2,0.1,30,0.3,5,0.1
3,0.1,40,0.3,5,0.1
4,0.1,50,0.3,5,0.1
5,0.1,60,0.3,5,0.1
6,0.2,10,0.3,5,0.1
7,0.2,20,0.3,5,0.1
8,0.2,30,0.3,5,0.1
9,0.2,40,0.3,5,0.1


## Testing with E1 criterium
we test the optimal points for the prediction threshold and number of boost iterations in the different validation groups

In [58]:
experimentModels=experimentModels[experimentModels.criteriumName=='E1'].reset_index(drop=True)
experimentModels

Unnamed: 0,criteriumId,criteriumName,labelerIdList,train,val,test,modelId,spindleTimeRate
0,1,E1,[0001],"[0003, 0005, 0007, 0009, 0010, 0011, 0012, 001...","[0017, 0002, 0006]",1,bdbdfc7b-0c97-484c-b4e9-70e698d9b353,0.018639
1,1,E1,[0001],"[0001, 0003, 0005, 0007, 0009, 0010, 0011, 001...","[0006, 0013, 0019]",2,bae09da0-598a-46ed-b46a-553a9d1de380,0.020055
2,1,E1,[0001],"[0002, 0005, 0007, 0009, 0010, 0011, 0012, 001...","[0013, 0006, 0001]",3,2c77d9d9-8a36-4e76-ac92-65f9318bd100,0.021075
3,1,E1,[0001],"[0001, 0002, 0003, 0006, 0007, 0009, 0010, 001...","[0018, 0014, 0012]",5,dc49a81b-fa55-4b8e-989b-e40bd13a4302,0.018331
4,1,E1,[0001],"[0003, 0007, 0009, 0010, 0011, 0012, 0013, 001...","[0005, 0001, 0002]",6,3d3c90d6-8d87-4dd0-85ec-9eedee1565cb,0.019222
5,1,E1,[0001],"[0001, 0002, 0003, 0010, 0011, 0012, 0013, 001...","[0005, 0006, 0009]",7,6622a394-926c-45ad-a0f5-eb1def948317,0.019625
6,1,E1,[0001],"[0001, 0003, 0005, 0006, 0007, 0010, 0011, 001...","[0002, 0017, 0013]",9,91153884-8b2d-49a1-86cd-386a1add8f8e,0.017824
7,1,E1,[0001],"[0002, 0003, 0005, 0006, 0007, 0009, 0012, 001...","[0017, 0001, 0011]",10,672f9a5b-bab6-4a47-8889-b20041f97fe8,0.017542
8,1,E1,[0001],"[0001, 0003, 0005, 0006, 0009, 0010, 0012, 001...","[0002, 0018, 0007]",11,98885354-4ef8-4d7f-b64d-8fb9a1edc578,0.015442
9,1,E1,[0001],"[0002, 0003, 0005, 0006, 0009, 0011, 0013, 001...","[0001, 0007, 0010]",12,39389298-e7f5-4de1-9dc7-bcf9a95245bd,0.01646


In [59]:
experimentModels_N2 = experimentModels.copy()
experimentModels_N2.head()

Unnamed: 0,criteriumId,criteriumName,labelerIdList,train,val,test,modelId,spindleTimeRate
0,1,E1,[0001],"[0003, 0005, 0007, 0009, 0010, 0011, 0012, 001...","[0017, 0002, 0006]",1,bdbdfc7b-0c97-484c-b4e9-70e698d9b353,0.018639
1,1,E1,[0001],"[0001, 0003, 0005, 0007, 0009, 0010, 0011, 001...","[0006, 0013, 0019]",2,bae09da0-598a-46ed-b46a-553a9d1de380,0.020055
2,1,E1,[0001],"[0002, 0005, 0007, 0009, 0010, 0011, 0012, 001...","[0013, 0006, 0001]",3,2c77d9d9-8a36-4e76-ac92-65f9318bd100,0.021075
3,1,E1,[0001],"[0001, 0002, 0003, 0006, 0007, 0009, 0010, 001...","[0018, 0014, 0012]",5,dc49a81b-fa55-4b8e-989b-e40bd13a4302,0.018331
4,1,E1,[0001],"[0003, 0007, 0009, 0010, 0011, 0012, 0013, 001...","[0005, 0001, 0002]",6,3d3c90d6-8d87-4dd0-85ec-9eedee1565cb,0.019222


In [60]:
annotationsN2=pd.read_csv(datapath+'/MASS/annotations/spindlesFilteredN2.csv')
annotationsN2['samplerate']=samplerate
annotationsN2['subjectId']=annotationsN2.apply(
    lambda row: str(row.subjectId).zfill(4),axis=1)
annotationsN2['labelerId']=annotationsN2.apply(
    lambda row: str(row.labelerId).zfill(4),axis=1)
annotationsN2['stopTime']=annotationsN2.apply(
    lambda row: row.startTime+row.duration , axis=1)
annotationsN2['startInd']=annotationsN2.apply(
    lambda row: seconds2index(row.startTime,row.samplerate) , axis=1)
annotationsN2['stopInd']=annotationsN2.apply(
    lambda row: seconds2index(row.stopTime,row.samplerate) , axis=1)

In [61]:
## load hypnograms
stagesAnnotations=pd.read_csv(datapath+"\MASS\stages\stages.csv")
stagesAnnotations['samplerate']=samplerate
stagesAnnotations['subjectId']=stagesAnnotations.apply(
    lambda row: str(row.subjectId).zfill(4),axis=1)
stagesAnnotations['stopTime']=stagesAnnotations.apply(
    lambda row: row.startTime+row.duration , axis=1)
stagesAnnotations['startInd']=stagesAnnotations.apply(
    lambda row: seconds2index(row.startTime,row.samplerate) , axis=1)
stagesAnnotations['stopInd']=stagesAnnotations.apply(
    lambda row: seconds2index(row.stopTime,row.samplerate) , axis=1)
stagesAnnotations.head(5)

hypnograms={}
for ind, row in signalsMetadata.iterrows():
    subjectId=row.subjectId
    thisStages=stagesAnnotations[stagesAnnotations.subjectId==subjectId]
    excerptDimension=int(row.duration*row.samplerate)
    thisHypnogram=np.ones((excerptDimension,))*np.nan
    for ind_stg, row_stg in thisStages.iterrows():
        thisHypnogram[row_stg.startInd:row_stg.stopInd]=row_stg.value
    hypnograms[subjectId]=thisHypnogram  

In [62]:
rawF1s = []
rawPrecisions = []
rawRecalls = []

rawThres = []
rawDepth = []

f1s = []
precisions = []
recalls = []

eventF1s = []
eventPrecisions = []
eventRecalls = []

checks = []

rawF1s_N2 = []
rawPrecisions_N2 = []
rawRecalls_N2 = []

f1s_N2 = []
precisions_N2 = []
recalls_N2 = []

eventF1s_N2 = []
eventPrecisions_N2 = []
eventRecalls_N2 = []

checks_N2 = []

thisExperimentModels = experimentModels.copy()
for ind, row in thisExperimentModels.iterrows():
    #load model
    print(ind)
    model = loadBooster(row.modelId, experimentId, datapath)
    #initialise lists
    rawF1xs=[]

    

    #iterate validation subjects
    for valSubjectId in row.val:
        print(valSubjectId)
        #Define annotations criterium
        usedAnnotations=annotations[annotations.labelerId.isin(row.labelerIdList)].reset_index(drop=True)
        #Load features and labels
        valFeatures=loadFeatureMatrix([valSubjectId],featureSelection,signalsMetadata,samplerate,datapath)
        valLabels=loadLabelsVector([valSubjectId],usedAnnotations,signalsMetadata,samplerate)
        #Predict
        valDMatrix=xgb.DMatrix(data=valFeatures)
        probabilities=[]
        for i_depth, j_depth in auxDepth.iterrows():
            pred=model.predict(valDMatrix,iteration_range=(0,int(j_depth['hyperDepth'])))
            probabilities.append(pred)
        probabilities=np.array(probabilities)
        f1xs=[]

        
        for ind_hyper,row_hyper in hyperParams.iterrows():
            thisExperimentModels=experimentModels.copy()
            hyperThres=row_hyper.hyperThres
            hyperDepth=int(row_hyper.hyperDepth)
            hyperMinDuration=row_hyper.hyperMinDuration
            hyperMaxDuration=row_hyper.hyperMaxDuration
            hyperClose=row_hyper.hyperClose
            probs=probabilities[auxDepth['hyperDepth']==hyperDepth][0]
            raw=probs>=hyperThres
            #Processed labels
            processed=labelingProcess(raw,hyperClose,hyperMinDuration,samplerate)
            gtAnnotations=labelVectorToAnnotations(valLabels,samplerate)
            detections=labelVectorToAnnotations(processed,samplerate)
            detections=detections[detections.duration<hyperMaxDuration]
            #Metrics
            f,r,p=annotationPairToMetrics(gtAnnotations,detections, thresIoU=0.2)
        
            #Metric appends
            f1xs.append(f)

        rawF1xs.append(np.array(f1xs))

    #statistics of the metrics over the subjects of the validation set
    meanF1s=np.mean(np.array(rawF1xs), axis=0)
    
    optimalInd=hyperParams[meanF1s==np.max(meanF1s)].iloc[0]
    hyperParams_opt=optimalInd
    print(hyperParams_opt)
    hyperThres=hyperParams_opt['hyperThres']
    hyperDepth=int(hyperParams_opt['hyperDepth'])
    hyperMinDuration=hyperParams_opt['hyperMinDuration']
    hyperMaxDuration=hyperParams_opt['hyperMaxDuration']
    hyperClose=hyperParams_opt['hyperClose']
    
    ##
    rawThres.append(hyperThres)
    rawDepth.append(hyperDepth)


    testSubjectId=row.test
    #Define annotations criterium
    usedAnnotations=annotations[annotations.labelerId.isin(row.labelerIdList)].reset_index(drop=True)
    #Load features and labels
    testFeatures=loadFeatureMatrix([testSubjectId],featureSelection,signalsMetadata,samplerate,datapath)
    testLabels=loadLabelsVector([testSubjectId],usedAnnotations,signalsMetadata,samplerate)

    #Predict
    testDMatrix=xgb.DMatrix(data=testFeatures)
    probabilities=model.predict(testDMatrix,iteration_range=(0,hyperDepth))
    rawLabels=probabilities>=hyperThres
    #Raw Metrics
    rawTp=np.sum(rawLabels*testLabels)
    rawFp=np.sum(rawLabels*(1-testLabels))
    rawTn=np.sum((1-rawLabels)*(1-testLabels))
    rawFn=np.sum((1-rawLabels)*testLabels)
    #Raw appends
    rawF1s.append(2*rawTp/(2*rawTp+rawFp+rawFn))
    rawPrecisions.append(rawTp/(rawTp+rawFp) )
    rawRecalls.append(rawTp/(rawTp+rawFn))
    #Process
    processedLabels=labelingProcess(rawLabels,hyperClose,hyperMinDuration,samplerate)
    #Processed metrics
    tp=np.sum(processedLabels*testLabels)
    fp=np.sum(processedLabels*(1-testLabels))
    tn=np.sum((1-processedLabels)*(1-testLabels))
    fn=np.sum((1-processedLabels)*testLabels)
    #Processed appends
    f1s.append(2*tp/(2*tp+fp+fn))
    precisions.append(tp/(tp+fp))
    recalls.append(tp/(tp+fn))

    #By-event metrics
    processedAnnotations=labelVectorToAnnotations(processedLabels,samplerate)
    processedAnnotations=processedAnnotations[processedAnnotations.duration<maxDuration]
    gtAnnotations=labelVectorToAnnotations(testLabels,samplerate)   #<- or just filter the annotations
    f,r,p=annotationPairToMetrics(gtAnnotations,processedAnnotations, thresIoU=0.2)
    print('F1-score')
    print(f)
    #calculate metrics
    eventF1s.append(f)
    eventPrecisions.append(p)
    eventRecalls.append(r)
    
    #N2
    testHypnogram=hypnograms[testSubjectId]
    testN2Mask=testHypnogram==2
    #Define annotations criterium
    usedAnnotations=annotationsN2[annotationsN2.labelerId.isin(row.labelerIdList)].reset_index(drop=True)
    #Load labels
    testLabels=loadLabelsVector([testSubjectId],usedAnnotations,signalsMetadata,samplerate)

    #Apply mask to predictions---------------------------------->
    rawLabels=rawLabels*testN2Mask

    
    #Raw Metrics
    rawTp = np.sum(rawLabels * testLabels)
    rawFp = np.sum(rawLabels * (1 - testLabels))
    rawTn = np.sum((1 - rawLabels) * (1 - testLabels))
    rawFn = np.sum((1 - rawLabels) * testLabels)
    #Raw appends
    rawF1s_N2.append(2 * rawTp / (2 * rawTp + rawFp + rawFn))
    rawPrecisions_N2.append(rawTp / (rawTp + rawFp))
    rawRecalls_N2.append(rawTp / (rawTp + rawFn))
    #Process
    processedLabels=labelingProcess(rawLabels,hyperClose,hyperMinDuration,samplerate)
    #Processed metrics
    tp = np.sum(processedLabels * testLabels)
    fp = np.sum(processedLabels * (1 - testLabels))
    tn = np.sum((1 - processedLabels) * (1 - testLabels))
    fn = np.sum((1 - processedLabels) * testLabels)
    #Processed appends
    f1s_N2.append(2 * tp / (2 * tp + fp + fn))
    precisions_N2.append(tp / (tp + fp))
    recalls_N2.append(tp / (tp + fn))

    #By-event metrics
    processedAnnotations = labelVectorToAnnotations(processedLabels, samplerate)
    gtAnnotations = labelVectorToAnnotations(testLabels, samplerate)  #<- or just filter the annotations
    f, r, p = annotationPairToMetrics(gtAnnotations, processedAnnotations, thresIoU=0.2)
    print('N2 F1-score')
    print(f)
    #calculate metrics
    eventF1s_N2.append(f)
    eventPrecisions_N2.append(p)
    eventRecalls_N2.append(r)

#include metrics in the dataframe
experimentModels['rawF1'] = rawF1s
experimentModels['rawPrecision'] = rawPrecisions
experimentModels['rawRecall'] = rawRecalls

experimentModels['f1'] = f1s
experimentModels['precision'] = precisions
experimentModels['recall'] = recalls

experimentModels['eventF1'] = eventF1s
experimentModels['eventPrecision'] = eventPrecisions
experimentModels['eventRecall'] = eventRecalls

experimentModels['Binarization_Threshold'] = rawF1s
experimentModels['Boosting_Iterations'] = rawPrecisions
#include metrics in the N2 dataframe
experimentModels_N2['rawF1'] = rawF1s_N2
experimentModels_N2['rawPrecision'] = rawPrecisions_N2
experimentModels_N2['rawRecall'] = rawRecalls_N2

experimentModels_N2['f1'] = f1s_N2
experimentModels_N2['precision'] = precisions_N2
experimentModels_N2['recall'] = recalls_N2

experimentModels_N2['eventF1'] = eventF1s_N2
experimentModels_N2['eventPrecision'] = eventPrecisions_N2
experimentModels_N2['eventRecall'] = eventRecalls_N2

experimentModels_N2['Binarization_Threshold'] = rawF1s
experimentModels_N2['Boosting_Iterations'] = rawPrecisions


0
0017
0002
0006
hyperThres           0.2
hyperDepth          30.0
hyperMinDuration     0.3
hyperMaxDuration     5.0
hyperClose           0.1
Name: 8, dtype: float64
F1-score
0.8120805369127517
N2 F1-score
0.8423669816011934
1
0006
0013
0019
hyperThres           0.5
hyperDepth          50.0
hyperMinDuration     0.3
hyperMaxDuration     5.0
hyperClose           0.1
Name: 28, dtype: float64
F1-score
0.791588785046729
N2 F1-score
0.7936201063315611
2
0013
0006
0001
hyperThres           0.3
hyperDepth          40.0
hyperMinDuration     0.3
hyperMaxDuration     5.0
hyperClose           0.1
Name: 15, dtype: float64
F1-score
0.5577342047930284
N2 F1-score
0.6095238095238096
3
0018
0014
0012
hyperThres           0.4
hyperDepth          40.0
hyperMinDuration     0.3
hyperMaxDuration     5.0
hyperClose           0.1
Name: 21, dtype: float64
F1-score
0.7255172413793104
N2 F1-score
0.7418899858956276
4
0005
0001
0002
hyperThres           0.3
hyperDepth          20.0
hyperMinDuration     0.3
hyperM

In [63]:
dumpPickle('experimentModels_ValidationperModel_Testing_E1_IoU-0.2_temp.pkl',experimentModels)

In [64]:
experimentModels.columns

Index(['criteriumId', 'criteriumName', 'labelerIdList', 'train', 'val', 'test',
       'modelId', 'spindleTimeRate', 'rawF1', 'rawPrecision', 'rawRecall',
       'f1', 'precision', 'recall', 'eventF1', 'eventPrecision', 'eventRecall',
       'Binarization_Threshold', 'Boosting_Iterations'],
      dtype='object')

In [65]:
fig=px.scatter(experimentModels,x='rawF1',y='f1',color='test',hover_name='modelId', marginal_y="histogram")
fig.add_trace(
    go.Scatter(x=experimentModels['rawF1'], y=experimentModels['rawF1'], name="identity", mode='lines',fill="toself")
)
fig.show()

In [66]:
fig=px.scatter(experimentModels,x='rawF1',y='eventF1',color='test',hover_name='modelId', marginal_y="histogram")
fig.add_trace(
    go.Scatter(x=experimentModels['rawF1'], y=experimentModels['rawF1'], name="identity", mode='lines',fill="toself")
)
fig.show()

In [67]:
fig=px.scatter(experimentModels,x='eventF1',y='eventPrecision',color='test',hover_name='modelId', marginal_y="histogram")
fig.add_trace(
    go.Scatter(x=experimentModels['eventF1'], y=experimentModels['eventF1'], name="identity", mode='lines',fill="toself")
)
fig.show()

In [68]:
fig=px.scatter(experimentModels,x='eventF1',y='eventRecall',color='test',hover_name='modelId', marginal_y="histogram")
fig.add_trace(
    go.Scatter(x=experimentModels['eventF1'], y=experimentModels['eventF1'], name="identity", mode='lines',fill="toself")
)
fig.show()

In [69]:
experimentModels[['test','eventF1','eventPrecision','eventRecall']].groupby('test').describe(percentiles=[0.5])

Unnamed: 0_level_0,eventF1,eventF1,eventF1,eventF1,eventF1,eventF1,eventPrecision,eventPrecision,eventPrecision,eventPrecision,eventPrecision,eventPrecision,eventRecall,eventRecall,eventRecall,eventRecall,eventRecall,eventRecall
Unnamed: 0_level_1,count,mean,std,min,50%,max,count,mean,std,min,50%,max,count,mean,std,min,50%,max
test,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
1,1.0,0.812081,,0.812081,0.812081,0.812081,1.0,0.81286,,0.81286,0.81286,0.81286,1.0,0.811303,,0.811303,0.811303,0.811303
2,1.0,0.791589,,0.791589,0.791589,0.791589,1.0,0.849549,,0.849549,0.849549,0.849549,1.0,0.741032,,0.741032,0.741032,0.741032
3,1.0,0.557734,,0.557734,0.557734,0.557734,1.0,0.405063,,0.405063,0.405063,0.405063,1.0,0.895105,,0.895105,0.895105,0.895105
5,1.0,0.725517,,0.725517,0.725517,0.725517,1.0,0.684896,,0.684896,0.684896,0.684896,1.0,0.771261,,0.771261,0.771261,0.771261
6,1.0,0.64,,0.64,0.64,0.64,1.0,0.8,,0.8,0.8,0.8,1.0,0.533333,,0.533333,0.533333,0.533333
7,1.0,0.685318,,0.685318,0.685318,0.685318,1.0,0.580182,,0.580182,0.580182,0.580182,1.0,0.837719,,0.837719,0.837719,0.837719
9,1.0,0.830581,,0.830581,0.830581,0.830581,1.0,0.828258,,0.828258,0.828258,0.828258,1.0,0.832924,,0.832924,0.832924,0.832924
10,1.0,0.761473,,0.761473,0.761473,0.761473,1.0,0.660797,,0.660797,0.660797,0.660797,1.0,0.898113,,0.898113,0.898113,0.898113
11,1.0,0.709618,,0.709618,0.709618,0.709618,1.0,0.566929,,0.566929,0.566929,0.566929,1.0,0.948845,,0.948845,0.948845,0.948845
12,1.0,0.72955,,0.72955,0.72955,0.72955,1.0,0.763889,,0.763889,0.763889,0.763889,1.0,0.698166,,0.698166,0.698166,0.698166


In [70]:
experimentModels[['test','eventF1','eventPrecision','eventRecall']].groupby('test',as_index=False).mean()

Unnamed: 0,test,eventF1,eventPrecision,eventRecall
0,1,0.812081,0.81286,0.811303
1,2,0.791589,0.849549,0.741032
2,3,0.557734,0.405063,0.895105
3,5,0.725517,0.684896,0.771261
4,6,0.64,0.8,0.533333
5,7,0.685318,0.580182,0.837719
6,9,0.830581,0.828258,0.832924
7,10,0.761473,0.660797,0.898113
8,11,0.709618,0.566929,0.948845
9,12,0.72955,0.763889,0.698166


In [71]:
experimentModels[['test','eventF1','eventPrecision','eventRecall']].groupby('test',as_index=False).mean().mean()

test              6.668000e+54
eventF1           7.218079e-01
eventPrecision    6.856261e-01
eventRecall       8.100066e-01
dtype: float64

In [72]:
experimentModels[['test','eventF1','eventPrecision','eventRecall']].groupby('test',as_index=False).mean().std()





eventF1           0.069873
eventPrecision    0.146947
eventRecall       0.122712
dtype: float64

In [73]:
auxPrecision=pd.DataFrame({
    'metric':'event precision',
    'value':experimentModels.eventPrecision,
    'event F1':experimentModels.eventF1
})

auxRecall=pd.DataFrame({
    'metric':'event recall',
    'value':experimentModels.eventRecall,
    'event F1':experimentModels.eventF1
})
visualTradeoff=pd.concat((auxPrecision,auxRecall))

In [74]:
fig=px.scatter(visualTradeoff,x='event F1',y='value',color='metric', marginal_y="histogram")
fig.add_trace(
    go.Scatter(x=experimentModels['eventF1'], y=experimentModels['eventF1'], name="identity", mode='lines',fill="toself")
)
fig.show()

## Restrict to only N2 stage

In [75]:
dumpPickle('experimentModelsTest_ValidationperModel_Testing_N2_E1_IoU-0.2_temp.pkl',experimentModels_N2)

In [76]:
fig=px.scatter(experimentModels_N2,x='rawF1',y='f1',color='test',hover_name='modelId', marginal_y="histogram")
fig.add_trace(
    go.Scatter(x=experimentModels_N2['rawF1'], y=experimentModels_N2['rawF1'], name="identity", mode='lines',fill="toself")
)
fig.show()

In [77]:
fig=px.scatter(experimentModels_N2,x='rawF1',y='eventF1',color='test',hover_name='modelId', marginal_y="histogram")
fig.add_trace(
    go.Scatter(x=experimentModels_N2['rawF1'], y=experimentModels_N2['rawF1'], name="identity", mode='lines',fill="toself")
)
fig.show()

In [78]:
fig=px.scatter(experimentModels_N2,x='eventF1',y='eventPrecision',color='test',hover_name='modelId', marginal_y="histogram")
fig.add_trace(
    go.Scatter(x=experimentModels_N2['eventF1'], y=experimentModels_N2['eventF1'], name="identity", mode='lines',fill="toself")
)
fig.show()

In [79]:
fig=px.scatter(experimentModels_N2,x='eventF1',y='eventRecall',color='test',hover_name='modelId', marginal_y="histogram")
fig.add_trace(
    go.Scatter(x=experimentModels_N2['eventF1'], y=experimentModels_N2['eventF1'], name="identity", mode='lines',fill="toself")
)
fig.show()

In [80]:
experimentModels_N2[['test','eventF1','eventPrecision','eventRecall']].groupby('test').describe(percentiles=[0.5])

Unnamed: 0_level_0,eventF1,eventF1,eventF1,eventF1,eventF1,eventF1,eventPrecision,eventPrecision,eventPrecision,eventPrecision,eventPrecision,eventPrecision,eventRecall,eventRecall,eventRecall,eventRecall,eventRecall,eventRecall
Unnamed: 0_level_1,count,mean,std,min,50%,max,count,mean,std,min,50%,max,count,mean,std,min,50%,max
test,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
1,1.0,0.842367,,0.842367,0.842367,0.842367,1.0,0.875905,,0.875905,0.875905,0.875905,1.0,0.811303,,0.811303,0.811303,0.811303
2,1.0,0.79362,,0.79362,0.79362,0.79362,1.0,0.856994,,0.856994,0.856994,0.856994,1.0,0.738974,,0.738974,0.738974,0.738974
3,1.0,0.609524,,0.609524,0.609524,0.609524,1.0,0.462094,,0.462094,0.462094,0.462094,1.0,0.895105,,0.895105,0.895105,0.895105
5,1.0,0.74189,,0.74189,0.74189,0.74189,1.0,0.714674,,0.714674,0.714674,0.714674,1.0,0.771261,,0.771261,0.771261,0.771261
6,1.0,0.655738,,0.655738,0.655738,0.655738,1.0,0.851064,,0.851064,0.851064,0.851064,1.0,0.533333,,0.533333,0.533333,0.533333
7,1.0,0.768574,,0.768574,0.768574,0.768574,1.0,0.710185,,0.710185,0.710185,0.710185,1.0,0.837719,,0.837719,0.837719,0.837719
9,1.0,0.843478,,0.843478,0.843478,0.843478,1.0,0.854271,,0.854271,0.854271,0.854271,1.0,0.832924,,0.832924,0.832924,0.832924
10,1.0,0.839412,,0.839412,0.839412,0.839412,1.0,0.787845,,0.787845,0.787845,0.787845,1.0,0.898113,,0.898113,0.898113,0.898113
11,1.0,0.72365,,0.72365,0.72365,0.72365,1.0,0.585239,,0.585239,0.585239,0.585239,1.0,0.947811,,0.947811,0.947811,0.947811
12,1.0,0.777341,,0.777341,0.777341,0.777341,1.0,0.875887,,0.875887,0.875887,0.875887,1.0,0.698727,,0.698727,0.698727,0.698727


In [81]:
experimentModels_N2[['test','eventF1','eventPrecision','eventRecall']].groupby('test',as_index=False).mean()

Unnamed: 0,test,eventF1,eventPrecision,eventRecall
0,1,0.842367,0.875905,0.811303
1,2,0.79362,0.856994,0.738974
2,3,0.609524,0.462094,0.895105
3,5,0.74189,0.714674,0.771261
4,6,0.655738,0.851064,0.533333
5,7,0.768574,0.710185,0.837719
6,9,0.843478,0.854271,0.832924
7,10,0.839412,0.787845,0.898113
8,11,0.72365,0.585239,0.947811
9,12,0.777341,0.875887,0.698727


In [82]:
experimentModels_N2[['test','eventF1','eventPrecision','eventRecall']].groupby('test',as_index=False).mean().mean()

test              6.668000e+54
eventF1           7.497775e-01
eventPrecision    7.354698e-01
eventRecall       8.097369e-01
dtype: float64

In [83]:
experimentModels_N2[['test','eventF1','eventPrecision','eventRecall']].groupby('test',as_index=False).mean().std()





eventF1           0.070005
eventPrecision    0.151961
eventRecall       0.122568
dtype: float64

In [84]:
auxPrecision=pd.DataFrame({
    'metric':'event precision',
    'value':experimentModels_N2.eventPrecision,
    'event F1':experimentModels_N2.eventF1
})

auxRecall=pd.DataFrame({
    'metric':'event recall',
    'value':experimentModels_N2.eventRecall,
    'event F1':experimentModels_N2.eventF1
})
visualTradeoff=pd.concat((auxPrecision,auxRecall))

In [85]:
fig=px.scatter(visualTradeoff,x='event F1',y='value',color='metric', marginal_y="histogram")
fig.add_trace(
    go.Scatter(x=experimentModels_N2['eventF1'], y=experimentModels_N2['eventF1'], name="identity", mode='lines',fill="toself")
)
fig.show()

### remove outliers

In [86]:
np.setdiff1d(experimentModels_N2.test,['0006'])

array(['0001', '0002', '0003', '0005', '0007', '0009', '0010', '0011',
       '0012', '0013', '0014', '0017', '0018', '0019'], dtype=object)

In [87]:
experimentModels_N2[experimentModels_N2.test.isin(np.setdiff1d(experimentModels_N2.test,['0001','0014','0019']))][['test','eventF1','eventPrecision','eventRecall']].groupby('test',as_index=False).mean().mean()

test              1.666917e+43
eventF1           7.534302e-01
eventPrecision    7.569994e-01
eventRecall       7.886105e-01
dtype: float64

In [88]:
annotationPairToGraph(gtAnnotations,processedAnnotations)