# MODEL TRAINING - testing the full approach - COGNITION
saving models in a different folder using pickle
**DO NOT RUN AGAIN**

In [1]:
#external libraries
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib.colors as clt
import plotly
import plotly.subplots as sb
import plotly.express as px
import plotly.graph_objects as go
import dotenv
import pandas as pd
import scipy.fft as fft
import scipy.signal as sg
import scipy.io as sio
import pickle as pkl
import xgboost as xgb
import time
import uuid
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import optimizers

#project library
from spinco import *

#environment variables
dotenv.load_dotenv('lab.env')

#project variables
datapath=os.environ['DATAPATH']
cognipath=datapath+"/COGNITION"
dreamspath=datapath+"/DREAMS"
masspath=datapath+"/MASS"


## experiment id
we'll use it to create a folder at the end of the script

In [2]:
experimentId=str(uuid.uuid4())

## define a fixed samplerate and input dimension for NN

In [3]:
samplerate=200
inputDimension=601

## load cognition

In [4]:
signals,annotations, signalsMetadata = loadCOGNITIONSpindles(cognipath, returnSignals=True)

In [5]:
annotations=annotations[annotations.type=='spindle'].reset_index(drop=True)
len(annotations)

2785

In [6]:
annotations.head()

Unnamed: 0,subjectId,channel,startTime,stopTime,duration,phase,labelerId,type,samplerate,startInd,stopInd
0,1,C3,1938.132297,1939.202326,1.070029,2,1,spindle,200,387626,387840
1,1,C3,2047.217898,2048.112834,0.894936,2,1,spindle,200,409444,409623
2,1,C3,2170.856032,2171.634237,0.778205,3,1,spindle,200,434171,434327
3,1,C3,2219.766538,2220.894943,1.128405,3,1,spindle,200,443953,444179
4,1,C3,2263.482492,2264.357974,0.875482,3,1,spindle,200,452696,452872


In [7]:
signalsMetadata.head()

Unnamed: 0,filename,channel,subjectId,duration,samplerate,isOriginalSamplerate,database
0,COG001_canal_C3.txt,C3-M2,1,39600,200,False,COGNITION
1,COG002_canal_C3.txt,C3-M2,2,39600,200,False,COGNITION
2,COG003_canal_C3.txt,C3-M2,3,39600,200,False,COGNITION
3,COG004_canal_C3.txt,C3-M2,4,39600,200,False,COGNITION
4,COG005_canal_C3.txt,C3-M2,5,39600,200,False,COGNITION


## Preprocess

broadband in 0-40Hz, previous experiments (up tp 50Hz, are now in folders as .../features_old/...)

In [8]:
def preprocessVector(vector,samplerate):
    #1. Lowpass 40Hz
    vector=filterBand(vector,[0,40],samplerate,filterOrder=4)
    #2. Z-score
    m=np.mean(vector)
    s=np.std(vector)
    vector=(vector-m)/s
    return vector

In [9]:
for subject,signal in signals.items():
    signals[subject]=preprocessVector(signal,samplerate)

## prepare slides and labels for training

In [10]:
signalsMetadata['excerptDimension']=signalsMetadata.apply(lambda row: int(row.duration*row.samplerate),axis=1)
allExtended=pd.DataFrame()
for ind,row in signalsMetadata.iterrows():
    thisExtended=pd.DataFrame({
        'trainIndex':np.arange(row.excerptDimension),
        'subjectId':row.subjectId
    })
    allExtended=pd.concat([allExtended,thisExtended])

In [11]:
allExtended

Unnamed: 0,trainIndex,subjectId
0,0,0001
1,1,0001
2,2,0001
3,3,0001
4,4,0001
...,...,...
7919995,7919995,0009
7919996,7919996,0009
7919997,7919997,0009
7919998,7919998,0009


In [12]:
usedAnnotations=annotations[annotations.labelerId=='0001'].reset_index(drop=True)

In [13]:
labels={}
for index, row in signalsMetadata.iterrows():
    labels[row.subjectId]=excerptAnnotationsToLabels(usedAnnotations[usedAnnotations.subjectId==row.subjectId],excerptDimension=row.excerptDimension)

In [14]:
#check
for index, row in signalsMetadata.iterrows():
    print(row.excerptDimension==len(labels[row.subjectId]))

True
True
True
True
True
True
True
True
True


In [15]:
slides={}
for index, row in signalsMetadata.iterrows():
    paddedSignal=padVectorBothSides(signals[row.subjectId],window2half(inputDimension),method='closest')
    slides[row.subjectId]=np_tricks.sliding_window_view(paddedSignal,(inputDimension,))

In [16]:
#check
for index, row in signalsMetadata.iterrows():
    print(row.excerptDimension==len(slides[row.subjectId]))

True
True
True
True
True
True
True
True
True


## load data split
LOOCV with 7 subjects

In [17]:
#by now we used fixed CV data splits instead
dataSplits=loadPickle("dataSplits_LOOCV.pkl")
dataSplits

Unnamed: 0,train,val,test
0,"[0004, 0005, 0006, 0007]","[0003, 0008]",2
1,"[0002, 0005, 0006, 0008]","[0007, 0004]",3
2,"[0002, 0003, 0005, 0007]","[0006, 0008]",4
3,"[0002, 0003, 0007, 0008]","[0006, 0004]",5
4,"[0002, 0003, 0007, 0008]","[0004, 0005]",6
5,"[0002, 0004, 0005, 0008]","[0003, 0006]",7
6,"[0002, 0005, 0006, 0007]","[0003, 0004]",8


## define annotation criteria

In [18]:
np.unique(annotations.labelerId)

array(['0001'], dtype=object)

In [19]:
annotationCriteria=pd.DataFrame({
    'criteriumId':['0001'],
    'criteriumName':['E1'],
    'labelerIdList':[['0001']]
})

In [20]:
annotationCriteria

Unnamed: 0,criteriumId,criteriumName,labelerIdList
0,1,E1,[0001]


In [21]:
experimentModels=pd.merge(annotationCriteria,dataSplits,how='cross')

In [22]:
experimentModels

Unnamed: 0,criteriumId,criteriumName,labelerIdList,train,val,test
0,1,E1,[0001],"[0004, 0005, 0006, 0007]","[0003, 0008]",2
1,1,E1,[0001],"[0002, 0005, 0006, 0008]","[0007, 0004]",3
2,1,E1,[0001],"[0002, 0003, 0005, 0007]","[0006, 0008]",4
3,1,E1,[0001],"[0002, 0003, 0007, 0008]","[0006, 0004]",5
4,1,E1,[0001],"[0002, 0003, 0007, 0008]","[0004, 0005]",6
5,1,E1,[0001],"[0002, 0004, 0005, 0008]","[0003, 0006]",7
6,1,E1,[0001],"[0002, 0005, 0006, 0007]","[0003, 0004]",8


## model fitting

In [34]:
DLmodels=[]
modelIds=[]
for index, row in experimentModels.iterrows():
    print('**********************')
    print(str(index+1)+' of '+str(len(experimentModels)))
    #model definition
    X_input = layers.Input((None,1))
    X = layers.Conv1D(filters=200,kernel_size=(inputDimension),activation='relu')(X_input)
    X = layers.Conv1D(filters=100,kernel_size=(1),activation='relu')(X)
    X = layers.Conv1D(filters=50,kernel_size=(1),activation='relu')(X)
    X = layers.Conv1D(filters=25,kernel_size=(1),activation='relu')(X)
    X = layers.Conv1D(filters=5,kernel_size=(1),activation='relu')(X)
    X = layers.Conv1D(filters=1,kernel_size=(1),activation='sigmoid')(X)
    X = layers.Flatten()(X)
    model = models.Model(inputs=X_input,outputs=X)

    #train generator
    trainExtended=allExtended[allExtended.subjectId.isin(row.train)].reset_index(drop=True)
    trainGenerator=spinGen(slides,labels,trainExtended)
    #val generator
    valExtended=allExtended[allExtended.subjectId.isin(row.val)].reset_index(drop=True)
    valGenerator=spinGen(slides,labels,valExtended)
    #model fit
    optimizer = optimizers.Adam()
    loss_fn = tf.keras.losses.binary_crossentropy
    model.compile(optimizer, loss_fn)
    history = model.fit(trainGenerator,validation_data=valGenerator,epochs=1)

    DLmodels.append(model)
    modelIds.append(str(uuid.uuid4()))


**********************
1 of 7
**********************
2 of 7
**********************
3 of 7
**********************
4 of 7
**********************
5 of 7
**********************
6 of 7
**********************
7 of 7


In [35]:
experimentModels["modelId"]=modelIds

In [36]:
experimentModels

Unnamed: 0,criteriumId,criteriumName,labelerIdList,train,val,test,modelId
0,1,E1,[0001],"[0004, 0005, 0006, 0007]","[0003, 0008]",2,ca4abebe-7534-4387-9f7e-7e56aeb102df
1,1,E1,[0001],"[0002, 0005, 0006, 0008]","[0007, 0004]",3,ec345db7-d0e7-447a-a219-52693d52bf41
2,1,E1,[0001],"[0002, 0003, 0005, 0007]","[0006, 0008]",4,5efd55af-ba1f-4dd8-905e-9279a3c25d07
3,1,E1,[0001],"[0002, 0003, 0007, 0008]","[0006, 0004]",5,79b2270a-cfb8-4f96-987a-040fa3dc0386
4,1,E1,[0001],"[0002, 0003, 0007, 0008]","[0004, 0005]",6,b1cb5f16-0c62-4dd7-b5d9-16fae63b28c5
5,1,E1,[0001],"[0002, 0004, 0005, 0008]","[0003, 0006]",7,28b0a080-fdd1-4b49-97d4-f98a8671c508
6,1,E1,[0001],"[0002, 0005, 0006, 0007]","[0003, 0004]",8,acc1aab9-4594-40ce-8d0c-f2111f7a6c69


## save results

In [37]:
#create parent folder
experimentpath=datapath+"/experiments/"+experimentId
os.mkdir(experimentpath)
#save each of the models
for name, model in zip(modelIds,DLmodels):
    dumpPickle(experimentpath+"/"+name+".pkl",model)
#save experiment information
dumpPickle(experimentpath+"/experimentModels.pkl",experimentModels)

Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers\conv1d
......vars
.........0
.........1
...layers\conv1d_1
......vars
.........0
.........1
...layers\conv1d_2
......vars
.........0
.........1
...layers\conv1d_3
......vars
.........0
.........1
...layers\conv1d_4
......vars
.........0
.........1
...layers\conv1d_5
......vars
.........0
.........1
...layers\flatten
......vars
...layers\input_layer
......vars
...metrics\mean
......vars
.........0
.........1
...optimizer
......vars
.........0
.........1
.........10
.........11
.........12
.........13
.........14
.........15
.........16
.........17
.........18
.........19
.........2
.........20
.........21
.........22
.........23
.........24
.........3
.........4
.........5
.........6
.........7
.........8
.........9
...vars
Keras model archive saving:
File Name                                             Modified             Size
config.json                                    2023-09-01 14:27:40         5064
met

In [38]:
#you need to use this in the following parts of the experiment
print(experimentId)

f7a9e88f-5e0f-4465-a5ff-1af9e2f93866
