In [152]:
#external libraries
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib.colors as clt
import plotly
import plotly.subplots as sb
import plotly.express as px
import plotly.graph_objects as go
import dotenv
import pandas as pd
import scipy.fft as fft
import scipy.signal as sg
import scipy.io as sio
import pickle as pkl
import xgboost as xgb
import time
import uuid

#project library
from spinco import *

#environment variables
dotenv.load_dotenv('lab.env')

#project variables
datapath=os.environ['DATAPATH']
cognipath=datapath+"/COGNITION"
dreamspath=datapath+"/DREAMS"
masspath=datapath+"/MASS"


## define a fixed samplerate

In [153]:
samplerate=200

## load mass

In [154]:
#annotations, signalsMetadata = loadMASSSpindles(masspath,forceSamplerate=samplerate,onlySpindlesFilteredN2=True)
annotations, signalsMetadata = loadMASSSpindles(masspath,forceSamplerate=samplerate)

In [155]:
annotations=annotations[annotations.type=='spindle'].reset_index(drop=True)
len(annotations)

33458

In [156]:
minDuration=0.3
maxDuration=5
annotations=annotations[annotations.duration>minDuration].reset_index(drop=True)
annotations=annotations[annotations.duration<maxDuration].reset_index(drop=True)
len(annotations)

33269

In [157]:
annotations.head()

Unnamed: 0,type,expert,subjectId,labelerId,startTime,duration,samplerate,stopTime,startInd,stopInd
0,spindle,E1,1,1,888.327805,0.640579,200,888.968384,177666,177794
1,spindle,E1,1,1,905.758061,0.578094,200,906.336155,181152,181267
2,spindle,E1,1,1,917.731574,0.847603,200,918.579177,183546,183716
3,spindle,E1,1,1,922.078189,0.878845,200,922.957034,184416,184591
4,spindle,E1,1,1,939.055445,0.757767,200,939.813212,187811,187963


In [158]:
signalsMetadata

Unnamed: 0,subjectId,file,channel,duration,samplerate,isOriginalSamplerate,database
0,1,MASS_0001.pkl,C3-CLE,28956.0,200,False,MASS
1,2,MASS_0002.pkl,C3-CLE,35016.0,200,False,MASS
2,3,MASS_0003.pkl,C3-CLE,36760.0,200,False,MASS
3,4,MASS_0004.pkl,C3-CLE,28004.0,200,False,MASS
4,5,MASS_0005.pkl,C3-CLE,31244.0,200,False,MASS
5,6,MASS_0006.pkl,C3-CLE,28990.0,200,False,MASS
6,7,MASS_0007.pkl,C3-CLE,28302.0,200,False,MASS
7,8,MASS_0008.pkl,C3-CLE,26846.0,200,False,MASS
8,9,MASS_0009.pkl,C3-CLE,29834.0,200,False,MASS
9,10,MASS_0010.pkl,C3-CLE,25930.0,200,False,MASS


In [159]:
signals,_,_= loadMASSSpindles(masspath,returnSignals=True)

In [160]:
print(256*25/32)   #<- TBD: make this automatic
# WARNING: parameters hardcoded ----------------------->
myUp=25
myDown=32
#<------------------------------------------------------

200.0


In [161]:
#1. resample
for ind, row in signalsMetadata.iterrows():
    signals[row.subjectId]=sg.resample_poly(signals[row.subjectId],up=myUp,down=myDown)

#2. update metadata
signalsMetadata["samplerate"]=samplerate
signalsMetadata["duration"]=signalsMetadata.apply(
    lambda row: len(signals[row.subjectId])/row.samplerate,
    axis=1) #it should be the exact same duration

#3. update annotations
annotations['samplerate']=samplerate
annotations['startInd']=annotations.apply(
    lambda row: seconds2index(row.startTime,row.samplerate),
    axis=1)
annotations['stopInd']=annotations.apply(
    lambda row: seconds2index(row.stopTime,row.samplerate),
    axis=1)

## Preprocess

broadband in 0-40Hz, previous experiments (up tp 50Hz, are now in folders as .../features_old/...)

In [162]:
def preprocessVector(vector,samplerate):
    #1. Lowpass 40Hz
    vector=filterBand(vector,[0,40],samplerate,filterOrder=4)
    #2. Z-score
    m=np.mean(vector)
    s=np.std(vector)
    vector=(vector-m)/s
    return vector

In [163]:
for subject,signal in signals.items():
    signals[subject]=preprocessVector(signal,samplerate)

prepare extended dataframe of all possible training examples

In [164]:
signalsMetadata['excerpDimension']=signalsMetadata.apply(lambda row: int(row.duration*row.samplerate),axis=1)

In [165]:
allExtended=pd.DataFrame()

In [166]:
for ind,row in signalsMetadata.iterrows():
    thisExtended=pd.DataFrame({
        'trainIndex':np.arange(row.excerpDimension),
        'subjectId':row.subjectId
    })
    allExtended=pd.concat([allExtended,thisExtended])

In [167]:
allExtended

Unnamed: 0,trainIndex,subjectId
0,0,0001
1,1,0001
2,2,0001
3,3,0001
4,4,0001
...,...,...
5278395,5278395,0019
5278396,5278396,0019
5278397,5278397,0019
5278398,5278398,0019


prepare annotations to use

In [168]:
usedAnnotations=annotations[annotations.labelerId=='0001'].reset_index(drop=True)

prepare all possible labels for training

In [169]:
labels={}
for index, row in signalsMetadata.iterrows():
    labels[row.subjectId]=excerptAnnotationsToLabels(usedAnnotations[usedAnnotations.subjectId==row.subjectId],excerptDimension=row.excerpDimension)

In [170]:
#check
for index, row in signalsMetadata.iterrows():
    print(row.excerpDimension==len(labels[row.subjectId]))

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


prepare all possible slides for training

In [171]:
slides={}
for index, row in signalsMetadata.iterrows():
    paddedSignal=padVectorBothSides(signals[row.subjectId],window2half(601),method='closest')
    slides[row.subjectId]=np_tricks.sliding_window_view(paddedSignal,(601,))

In [172]:
#check
for index, row in signalsMetadata.iterrows():
    print(row.excerpDimension==len(slides[row.subjectId]))

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


prepare the NN

In [173]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import optimizers


In [174]:
X_input = layers.Input((601,1))
X = layers.Dense(200,activation='relu')(X_input)
X = layers.Dense(100,activation='relu')(X)
X = layers.Dense(50,activation='relu')(X)
X = layers.Dense(25,activation='relu')(X)
X = layers.Dense(5,activation='relu')(X)
X = layers.Dense(1,activation='sigmoid')(X)
model = models.Model(inputs=X_input,outputs=X)

In [175]:
print(model.summary())

Model: "model_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_11 (InputLayer)       [(None, 601, 1)]          0         
                                                                 
 dense_55 (Dense)            (None, 601, 200)          400       
                                                                 
 dense_56 (Dense)            (None, 601, 100)          20100     
                                                                 
 dense_57 (Dense)            (None, 601, 50)           5050      
                                                                 
 dense_58 (Dense)            (None, 601, 25)           1275      
                                                                 
 dense_59 (Dense)            (None, 601, 5)            130       
                                                                 
 dense_60 (Dense)            (None, 601, 1)            6   

create a generator

In [176]:
allExtended.columns

Index(['trainIndex', 'subjectId'], dtype='object')

In [177]:
#   spin Generator       ------------------------------------------
class spinGen(tf.keras.utils.Sequence):
    def __init__(self, slides, labels, allExtended,
                batch_size=1024,
                shuffle=True):
        
        #copy variables
        self.slides = slides.copy()
        self.labels = labels.copy()
        self.allExtended = allExtended.copy()
        self.batch_size = batch_size
        self.shuffle = shuffle
        #compute df and n
        if shuffle:
            self.df=allExtended.sample(frac=1)
        else:
            self.df=allExtended
        self.n = len(self.df)

    def __getitem__(self, index):
        batch = self.df[index * self.batch_size:(index + 1) * self.batch_size]
        return self.__get_data(batch)

    def __get_data(self,batch):
        #inputs
        index_batch = batch['trainIndex']
        subject_batch = batch['subjectId']
        inputs = np.asarray([self.__get_input(index,subject) for index,subject in zip(index_batch,subject_batch)])
        #outputs
        outputs= np.asarray([self.__get_label(index,subject) for index,subject in zip(index_batch,subject_batch)])
        return inputs, outputs

    def __get_input(self, index,subject):
        slide=self.slides[subject][index]
        slide=slide[...,np.newaxis]
        return slide
    
    def __get_label(self, index,subject):
        label=self.labels[subject][index]
        label=label[...,np.newaxis]
        return label

    def __len__(self):
        return self.n // self.batch_size

    def on_epoch_end(self):
        if self.shuffle:
            self.df=self.df.sample(frac=1)

In [178]:
test=spinGen(slides,labels,allExtended)

In [179]:
a,b=next(iter(test))

In [180]:
a.shape

(1024, 601, 1)

In [181]:
b.shape

(1024, 1)

In [182]:
optimizer = optimizers.Adam()
loss_fn = tf.keras.losses.binary_crossentropy
model.compile(optimizer, loss_fn)

In [183]:
history = model.fit(spinGen(slides,labels,allExtended),epochs=1)

  6276/109987 [>.............................] - ETA: 18:20:59 - loss: 0.0883