In [1]:
#external libraries
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib.colors as clt
import plotly
import plotly.subplots as sb
import plotly.express as px
import plotly.graph_objects as go
import dotenv
import pandas as pd
import scipy.fft as fft
import scipy.signal as sg
import scipy.io as sio
import pickle as pkl
import xgboost as xgb
import time

#project library
from spinco import *

#environment variables
dotenv.load_dotenv('spinco.env')

#project variables
datapath=os.environ['DATAPATH']
cognipath=datapath+"\\COGNITION"
dreamspath=datapath+"\\DREAMS"
masspath=datapath+"\\MASS"


ModuleNotFoundError: No module named 'plotly'

## load DREAMS

In [2]:
signals, annotations, signalsMetadata = loadDREAMSSpindles(dreamspath)

SubjectId: 0001--------------
resampling from 100 to 200
SubjectId: 0003--------------
resampling from 50 to 200
SubjectId: 0006--------------
duration discrepancy, removing last 0.7 seconds


In [3]:
annotations.head()

Unnamed: 0,index,startTime,duration,channel,subjectId,labelerId,type,samplerate,stopTime,startInd,stopInd
0,0,282.24,0.72,C3-A1,1,1,spindle,200,282.96,56448,56592
1,1,311.72,1.54,C3-A1,1,1,spindle,200,313.26,62344,62652
2,2,340.28,0.72,C3-A1,1,1,spindle,200,341.0,68056,68200
3,3,366.83,0.65,C3-A1,1,1,spindle,200,367.48,73366,73496
4,4,373.74,0.5,C3-A1,1,1,spindle,200,374.24,74748,74848


In [4]:
signalsMetadata.head()

Unnamed: 0,filename,channel,subjectId,duration,samplerate,database
0,excerpt1.txt,C3-A1,1,1800,200,DREAMS
1,excerpt2.txt,CZ-A1,2,1800,200,DREAMS
2,excerpt3.txt,C3-A1,3,1800,200,DREAMS
3,excerpt4.txt,CZ-A1,4,1800,200,DREAMS
4,excerpt5.txt,CZ-A1,5,1800,200,DREAMS


## spindle duration histograms

In [5]:
spindles=annotations[annotations.type=='spindle'].copy()

In [6]:
len(spindles)

764

In [7]:
#spindles longer than 4 seconds
print(len(spindles[spindles.duration>4])/len(spindles)) #50 parts per million
spindles[spindles.duration>4]

0.0


Unnamed: 0,index,startTime,duration,channel,subjectId,labelerId,type,samplerate,stopTime,startInd,stopInd


In [8]:
aux=annotations[["type","labelerId","subjectId"]].groupby(["type","labelerId","subjectId"],as_index=False).size()
px.bar(aux,x="subjectId",y="size",color="labelerId",barmode="group",facet_row="type",title="DREAMS annotations overview")

In [9]:
px.histogram(spindles,x='duration',title="DREAMS spindle duration histogram")

In [10]:
px.histogram(spindles,x='duration',facet_row="labelerId",title="DREAMS spindle duration histogram by expert")

In [11]:
px.histogram(spindles,x='duration',color="subjectId",facet_row="labelerId",title="DREAMS spindle duration histogram by expert and signal")

In [12]:
aux=spindles[['subjectId','startTime']].groupby('subjectId',as_index=False).size()
aux

Unnamed: 0,subjectId,size
0,1,167
1,2,112
2,3,49
3,4,69
4,5,142
5,6,159
6,7,18
7,8,48


In [13]:
aux=pd.merge(signalsMetadata,aux,on="subjectId")
aux["density"]=aux["size"]/(aux["duration"]/3600)

In [14]:
aux["density"]

0    334.0
1    224.0
2     98.0
3    138.0
4    284.0
5    318.0
6     36.0
7     96.0
Name: density, dtype: float64

In [15]:
np.mean(aux["density"])

191.0

In [16]:
np.std(aux["density"])

106.55045753069294

In [17]:
px.histogram(aux['size'][aux['size']>500],nbins=6)

In [18]:
m=np.mean(aux['size'])
s=np.std(aux['size'])

In [19]:
(1542>(m-s))and(1542<(m+s))

False

In [20]:
m-1.25*s

28.905964043316914

In [21]:
aux['is1.5sigma']=aux.apply(
    lambda row: (row['size']>(m-0.75*s))and(row['size']<(m+0.75*s)),
    axis=1)
aux['is2sigma']=aux.apply(
    lambda row: (row['size']>(m-s))and(row['size']<(m+s)),
    axis=1)
aux['is5/2sigma']=aux.apply(
    lambda row: (row['size']>(m-1.25*s))and(row['size']<(m+1.25*s)),
    axis=1)
aux['is3sigma']=aux.apply(
    lambda row: (row['size']>(m-1.5*s))and(row['size']<(m+1.5*s)),
    axis=1)
aux

Unnamed: 0,filename,channel,subjectId,duration,samplerate,database,size,density,is1.5sigma,is2sigma,is5/2sigma,is3sigma
0,excerpt1.txt,C3-A1,1,1800,200,DREAMS,167,334.0,False,False,False,True
1,excerpt2.txt,CZ-A1,2,1800,200,DREAMS,112,224.0,True,True,True,True
2,excerpt3.txt,C3-A1,3,1800,200,DREAMS,49,98.0,False,True,True,True
3,excerpt4.txt,CZ-A1,4,1800,200,DREAMS,69,138.0,True,True,True,True
4,excerpt5.txt,CZ-A1,5,1800,200,DREAMS,142,284.0,False,True,True,True
5,excerpt6.txt,CZ-A1,6,1800,200,DREAMS,159,318.0,False,False,True,True
6,excerpt7.txt,CZ-A1,7,1800,200,DREAMS,18,36.0,False,False,False,True
7,excerpt8.txt,CZ-A1,8,1800,200,DREAMS,48,96.0,False,True,True,True


In [22]:
spindles[['labelerId','subjectId','duration']].groupby(['labelerId']).describe()

Unnamed: 0_level_0,duration,duration,duration,duration,duration,duration,duration,duration
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
labelerId,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
1,355.0,0.819859,0.268467,0.39,0.61,0.76,0.96,1.89
2,409.0,0.997531,0.035272,0.49,1.0,1.0,1.0,1.0


## criterium to remove outliers

In [23]:
#keep around 99% of annotations
len(spindles)*0.01

7.640000000000001

In [24]:
len(spindles)*0.01/2

3.8200000000000003

In [25]:
minDuration=0.3
maxDuration=2.9

In [26]:
len(spindles[spindles.duration>maxDuration])

0

In [27]:
len(spindles[spindles.duration<minDuration])

0

In [28]:
(len(spindles[spindles.duration>maxDuration])+len(spindles[spindles.duration<minDuration]))/len(spindles)

0.0

In [29]:
1-(len(spindles[spindles.duration>maxDuration])+len(spindles[spindles.duration<minDuration]))/len(spindles)

1.0

In [30]:
spindles['overMax']=spindles.duration>maxDuration
spindles['underMin']=spindles.duration<minDuration
spindles['discard']=spindles.overMax|spindles.underMin
spindles['keep']=(spindles.duration>minDuration)&(spindles.duration<maxDuration)
spindles

Unnamed: 0,index,startTime,duration,channel,subjectId,labelerId,type,samplerate,stopTime,startInd,stopInd,overMax,underMin,discard,keep
0,0,282.240,0.72,C3-A1,0001,0001,spindle,200,282.960,56448,56592,False,False,False,True
1,1,311.720,1.54,C3-A1,0001,0001,spindle,200,313.260,62344,62652,False,False,False,True
2,2,340.280,0.72,C3-A1,0001,0001,spindle,200,341.000,68056,68200,False,False,False,True
3,3,366.830,0.65,C3-A1,0001,0001,spindle,200,367.480,73366,73496,False,False,False,True
4,4,373.740,0.50,C3-A1,0001,0001,spindle,200,374.240,74748,74848,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
759,759,1731.695,1.00,CZ-A1,0006,0002,spindle,200,1732.695,346339,346539,False,False,False,True
760,760,1769.555,1.00,CZ-A1,0006,0002,spindle,200,1770.555,353911,354111,False,False,False,True
761,761,1780.175,1.00,CZ-A1,0006,0002,spindle,200,1781.175,356035,356235,False,False,False,True
762,762,1793.640,1.00,CZ-A1,0006,0002,spindle,200,1794.640,358728,358928,False,False,False,True


In [31]:
spindles[spindles.discard].groupby(['subjectId','labelerId']).describe()

Unnamed: 0_level_0,index,index,index,index,index,index,index,index,startTime,startTime,...,startInd,startInd,stopInd,stopInd,stopInd,stopInd,stopInd,stopInd,stopInd,stopInd
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max


In [32]:
spindles[spindles.keep][['subjectId','labelerId','duration']].groupby(['subjectId','labelerId']).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,duration,duration,duration,duration,duration,duration,duration,duration
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
subjectId,labelerId,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
1,1,52.0,0.815385,0.271101,0.5,0.645,0.76,0.855,1.67
1,2,115.0,0.991217,0.066308,0.49,1.0,1.0,1.0,1.0
2,1,60.0,0.7645,0.211199,0.49,0.605,0.73,0.85,1.46
2,2,52.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
3,1,5.0,0.92,0.347922,0.61,0.7,0.76,1.07,1.46
3,2,44.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
4,1,44.0,1.078636,0.336746,0.39,0.85,1.01,1.3125,1.8
4,2,25.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
5,1,56.0,0.720179,0.181824,0.5,0.5625,0.72,0.835,1.28
5,2,86.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
