In [240]:
#external libraries
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib.colors as clt
import plotly
import plotly.subplots as sb
import plotly.express as px
import plotly.graph_objects as go
import dotenv
import pandas as pd
import scipy.fft as fft
import scipy.signal as sg
import scipy.io as sio
import pickle as pkl
import xgboost as xgb
import time

#project library
from spinco import *

#environment variables
dotenv.load_dotenv('lab.env')

#project variables
datapath=os.environ['DATAPATH']
cognipath=datapath+"\\COGNITION"
dreamspath=datapath+"\\DREAMS"
masspath=datapath+"\\MASS"


## load mass

In [241]:
annotations, signalsMetadata = loadMASSSpindles(masspath)

In [242]:
annotations.head()

Unnamed: 0,type,expert,subjectId,labelerId,startTime,duration,samplerate,stopTime,startInd,stopInd
0,kcomplex,E1,1,1,830.596676,0.699174,256,831.29585,212633,212812
1,kcomplex,E1,1,1,840.981316,0.492156,256,841.473472,215291,215417
2,kcomplex,E1,1,1,970.596678,0.578088,256,971.174766,248473,248621
3,kcomplex,E1,1,1,1049.772807,0.695268,256,1050.468075,268742,268920
4,kcomplex,E1,1,1,1077.231575,0.648396,256,1077.879971,275771,275937


In [243]:
signalsMetadata.head()

Unnamed: 0,subjectId,file,channel,duration,samplerate,isOriginalSamplerate,database
0,1,MASS_0001.pkl,C3-CLE,28956.0,256,True,MASS
1,2,MASS_0002.pkl,C3-CLE,35016.0,256,True,MASS
2,3,MASS_0003.pkl,C3-CLE,36760.0,256,True,MASS
3,4,MASS_0004.pkl,C3-CLE,28004.0,256,True,MASS
4,5,MASS_0005.pkl,C3-CLE,31244.0,256,True,MASS


## spindle duration histograms

In [244]:
spindles=annotations[annotations.type=='spindle'].copy()

In [245]:
#spindles longer than 4 seconds
print(len(spindles[spindles.duration>4])/len(spindles)) #50 parts per million
spindles[spindles.duration>4]

0.00059776436128878


Unnamed: 0,type,expert,subjectId,labelerId,startTime,duration,samplerate,stopTime,startInd,stopInd
13306,spindle,E2,6,2,18638.329295,20.764296,256,18659.093591,4771412,4776728
19469,spindle,E2,9,2,2202.741728,5.757444,256,2208.499172,563902,565376
19511,spindle,E2,9,2,2502.929749,4.601268,256,2507.531017,640750,641928
20068,spindle,E2,9,2,10321.519079,10.413396,256,10331.932475,2642309,2644975
21090,spindle,E2,9,2,27080.863731,14.018634,256,27094.882365,6932701,6936290
29628,spindle,E2,12,2,16846.926775,4.80438,256,16851.731155,4312813,4314043
29902,spindle,E2,12,2,24721.796353,4.359096,256,24726.155449,6328780,6329896
31303,spindle,E2,13,2,7311.729964,4.363002,256,7316.092966,1871803,1872920
31360,spindle,E2,13,2,7845.733229,7.210476,256,7852.943705,2008508,2010354
31711,spindle,E2,13,2,13991.667756,8.811936,256,14000.479692,3581867,3584123


In [246]:
aux=annotations[["type","expert","subjectId"]].groupby(["type","expert","subjectId"],as_index=False).size()
px.bar(aux,x="subjectId",y="size",color="expert",barmode="group",facet_row="type",title="MASS SS2 annotations overview")

In [247]:
px.histogram(spindles,x='duration',title="MASS SS2 spindle duration histogram")

In [248]:
px.histogram(spindles,x='duration',facet_row="expert",title="MASS SS2 spindle duration histogram by expert")

In [249]:
px.histogram(spindles,x='duration',color="subjectId",facet_row="expert",title="MASS SS2 spindle duration histogram by expert and signal")

In [250]:
aux=spindles[['subjectId','startTime']].groupby('subjectId',as_index=False).size()
aux

Unnamed: 0,subjectId,size
0,1,3496
1,2,3355
2,3,745
3,4,253
4,5,1542
5,6,990
6,7,2519
7,8,385
8,9,2484
9,10,2734


In [251]:
px.histogram(aux['size'][aux['size']>500],nbins=6)

In [252]:
m=np.mean(aux['size'])
s=np.std(aux['size'])

In [253]:
(1542>(m-s))and(1542<(m+s))

True

In [254]:
m-1.25*s

481.9926866951573

In [255]:
aux['is1.5sigma']=aux.apply(
    lambda row: (row['size']>(m-0.75*s))and(row['size']<(m+0.75*s)),
    axis=1)
aux['is2sigma']=aux.apply(
    lambda row: (row['size']>(m-s))and(row['size']<(m+s)),
    axis=1)
aux['is5/2sigma']=aux.apply(
    lambda row: (row['size']>(m-1.25*s))and(row['size']<(m+1.25*s)),
    axis=1)
aux['is3sigma']=aux.apply(
    lambda row: (row['size']>(m-1.5*s))and(row['size']<(m+1.5*s)),
    axis=1)
aux

Unnamed: 0,subjectId,size,is1.5sigma,is2sigma,is5/2sigma,is3sigma
0,1,3496,False,False,False,False
1,2,3355,False,False,False,False
2,3,745,False,True,True,True
3,4,253,False,False,False,True
4,5,1542,True,True,True,True
5,6,990,False,True,True,True
6,7,2519,True,True,True,True
7,8,385,False,False,False,True
8,9,2484,True,True,True,True
9,10,2734,False,True,True,True


In [287]:
spindles[['labelerId','subjectId','duration']].groupby(['labelerId']).describe()

Unnamed: 0_level_0,duration,duration,duration,duration,duration,duration,duration,duration
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
labelerId,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
1,11204.0,0.840652,0.206918,0.335915,0.699173,0.80854,0.941353,2.218605
2,22254.0,1.203908,0.589257,0.11718,0.773388,1.11321,1.515528,20.764296


## criterium to remove outliers

In [257]:
#keep around 99% of annotations
len(spindles)*0.01

334.58

In [258]:
len(spindles)*0.01/2

167.29

In [295]:
minDuration=0.3
maxDuration=2.9

In [296]:
len(spindles[spindles.duration>maxDuration])

196

In [297]:
len(spindles[spindles.duration<minDuration])

182

In [286]:
(len(spindles[spindles.duration>maxDuration])+len(spindles[spindles.duration<minDuration]))/len(spindles)

0.011297746428357942

In [263]:
1-(len(spindles[spindles.duration>maxDuration])+len(spindles[spindles.duration<minDuration]))/len(spindles)

0.9887022535716421

In [277]:
spindles['overMax']=spindles.duration>maxDuration
spindles['underMin']=spindles.duration<minDuration
spindles['discard']=spindles.overMax|spindles.underMin
spindles['keep']=(spindles.duration>minDuration)&(spindles.duration<maxDuration)
spindles

Unnamed: 0,type,expert,subjectId,labelerId,startTime,duration,samplerate,stopTime,startInd,stopInd,overMax,underMin,discard,keep
409,spindle,E1,0001,0001,888.327805,0.640579,256,888.968384,227412,227576,False,False,False,True
410,spindle,E1,0001,0001,905.758061,0.578094,256,906.336155,231874,232022,False,False,False,True
411,spindle,E1,0001,0001,917.731574,0.847603,256,918.579177,234939,235156,False,False,False,True
412,spindle,E1,0001,0001,922.078189,0.878845,256,922.957034,236052,236277,False,False,False,True
413,spindle,E1,0001,0001,939.055445,0.757767,256,939.813212,240398,240592,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45035,spindle,E2,0019,0002,25433.134130,0.800730,256,25433.934860,6510882,6511087,False,False,False,True
45036,spindle,E2,0019,0002,25437.911168,0.472626,256,25438.383794,6512105,6512226,False,False,False,True
45037,spindle,E2,0019,0002,25443.578774,1.035090,256,25444.613864,6513556,6513821,False,False,False,True
45038,spindle,E2,0019,0002,25456.630907,0.890568,256,25457.521475,6516898,6517125,False,False,False,True


In [273]:
spindles[spindles.discard].groupby(['subjectId','expert']).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,startTime,startTime,startTime,startTime,startTime,startTime,startTime,startTime,duration,duration,...,startInd,startInd,stopInd,stopInd,stopInd,stopInd,stopInd,stopInd,stopInd,stopInd
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
subjectId,expert,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
1,E2,22.0,16098.685094,8641.962551,836.894437,12219.323572,17581.626417,23798.774264,27807.316802,22.0,1.585658,...,6092486.25,7118673.0,22.0,4121669.0,2212384.0,214313.0,3129047.0,4500967.5,6093086.5,7118742.0
2,E2,15.0,13418.490055,9730.926965,527.886691,7227.143271,10157.76875,20597.998917,29838.457689,15.0,1.405639,...,5273088.0,7638645.0,15.0,3435493.0,2491241.0,135183.0,1850221.5,2601179.0,5273885.5,7638713.0
3,E2,7.0,13665.721928,5038.506856,7714.587258,10887.470419,13314.736234,14669.943101,23515.902968,7.0,0.26505,...,3755505.5,6020071.0,7.0,3498493.0,1289855.0,1974997.0,2787265.5,3408629.0,3755581.5,6020128.0
5,E2,15.0,14396.503424,9176.033375,3097.904434,3719.73115,18896.141936,21414.818606,28042.408189,15.0,0.257796,...,5482193.5,7178856.0,15.0,3685571.0,2349064.0,793109.0,952323.5,4837475.0,5482254.5,7178919.0
6,E2,17.0,16517.982246,7071.423389,1068.967911,15600.709387,17121.269002,18638.329295,25653.426512,17.0,1.626045,...,4771412.0,6567277.0,17.0,4229020.0,1810435.0,273732.0,3993858.0,4383098.0,4776728.0,6567323.0
7,E2,36.0,15922.295152,8099.30726,2284.380543,9700.359493,16286.150234,23896.415785,27743.859838,36.0,2.122368,...,6117482.25,7102428.0,36.0,4076651.0,2073619.0,584877.0,2483869.75,4170029.5,6118341.0,7103297.0
9,E2,30.0,15708.320216,8687.632236,1910.776835,9535.117248,16962.108849,22921.117813,27080.863731,30.0,2.53877,...,5867806.5,6932701.0,30.0,4021980.0,2224028.0,489191.0,2441713.75,4343093.5,5868714.0,6936290.0
10,E2,19.0,11549.626527,7873.071793,433.902685,4914.808302,11806.25445,17513.989285,24140.044888,19.0,1.30851,...,4483581.5,6179851.0,19.0,2957039.0,2015427.0,111117.0,1258981.5,3023221.0,4483991.5,6179927.0
11,E2,31.0,13019.331841,7197.234543,963.714674,5938.992845,13462.296058,17679.263148,23896.176746,31.0,1.504944,...,4525891.5,6117421.0,31.0,3333334.0,1842460.0,246774.0,1520800.0,3446424.0,4526412.5,6117497.0
12,E2,40.0,16400.301974,8567.719585,1153.092932,9324.93989,16228.342163,23140.212796,29382.844908,40.0,2.170564,...,5923894.75,7522008.0,40.0,4199033.0,2193477.0,295936.0,2388016.0,4155394.0,5924715.0,7522874.0


In [280]:
spindles[spindles.keep][['subjectId','expert','duration']].groupby(['subjectId','expert']).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,duration,duration,duration,duration,duration,duration,duration,duration
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
subjectId,expert,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
1,E1,1044.0,0.822037,0.202792,0.390602,0.675735,0.796822,0.945251,1.839722
1,E2,2430.0,1.182772,0.482949,0.316386,0.828072,1.128834,1.48428,2.878722
2,E1,1143.0,0.834582,0.195713,0.41404,0.699173,0.816353,0.937439,1.820198
2,E2,2197.0,1.218885,0.46427,0.320292,0.85932,1.187424,1.54287,2.847474
3,E1,143.0,0.690269,0.149652,0.460907,0.5859,0.648399,0.775341,1.16008
3,E2,595.0,0.968905,0.389112,0.320292,0.691362,0.894474,1.183518,2.671704
4,E1,253.0,0.829708,0.224842,0.492157,0.667923,0.785103,0.945251,1.675674
5,E1,341.0,0.706104,0.131805,0.425751,0.613243,0.699173,0.781197,1.195236
5,E2,1186.0,0.924721,0.364111,0.320292,0.652302,0.867132,1.128834,2.51937
6,E1,150.0,0.761306,0.143614,0.492157,0.679642,0.749954,0.847603,1.480377
