The following code reads in the InterseizureInterval.csv data file, giving us a vector of interseizure intervals (ISIs). It then partitions the ISI data into training and test data (currently 75% of the data is used for training and 25% for testing). Here the our primary regressors are d-dimensional time delay embeddings of the ISIs, where each point is a vector of d consecutive ISIs. The network is trained to predict the (d+1)st ISI from this sequence of d ISIs.

The neural network used here is tensorflow's feedforward neural network. 

In [207]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as matplot
import math as mth

from tensorflow import keras
from keras import layers, models, optimizers
from keras.models import Sequential
from keras.layers import Dense

In [208]:
'''
#The following reads in code from the raw data file:

import numpy as np
import csv

file = open('SeizuretrackerSample.csv')
csvreader = csv.reader(file)

rows = []

#get through initial headers 
for i in range(0,25):
  nextRow = next(csvreader) 

for i in range(0, 2639-25):
  nextRow = next(csvreader)
  rows.append(nextRow)
  
'''

#the following code reads in the InterseizureInterval csv file 
#ISIs are already parsed so no need for the converToDateTimes or 

import numpy as np
import csv

file = open('InterSeizureInterval.csv')
csvreader = csv.reader(file)
ISI = np.zeros((2613, 1))

for i in range(0, 2613):
  nextRow = next(csvreader)
  ISI[i] = float(nextRow[0])


In [209]:
import numpy as np
from datetime import datetime

def convertToDateTimes(list):
  dateList = []

  i = 0
  for row in rows:
    date = row[1] 
    dateTime =  datetime.strptime(date, '%Y-%m-%d %X')  
    dateList.append(dateTime)

  return dateList

In [210]:
import numpy as np
from datetime import datetime

def ISICalc(dateList):
  ISI = np.zeros((len(dateList)-1, 1))

  for i in range (0,len(dateList)-1):
    delta = dateList[i+1]- dateList[i]
    isi =  delta.total_seconds()
    ISI[i] = isi/(60*60*24) #express ISIs in days
  return ISI


In [211]:
'''
def timeDelayEmbedding(array, d, n):
  #m = mth.floor(n/(tau+1)) #number of embeddings + correct one forecast into the future predictions
  m = n-d+1 
  embeddings = np.zeros([m,d])
  answers = np.zeros([m,1])

  indx=0
  for i in range(0,m):
    embeddings[i,0:d] = array[0,indx:indx+d]
    #print('index: ',i)
    #indx = indx+tau #yields disjoint embeddings- no overlapping points
    indx = indx + 1

  
  return[embeddings, answers, d, m]
  '''

"\ndef timeDelayEmbedding(array, d, n):\n  #m = mth.floor(n/(tau+1)) #number of embeddings + correct one forecast into the future predictions\n  m = n-d+1 \n  embeddings = np.zeros([m,d])\n  answers = np.zeros([m,1])\n\n  indx=0\n  for i in range(0,m):\n    embeddings[i,0:d] = array[0,indx:indx+d]\n    #print('index: ',i)\n    #indx = indx+tau #yields disjoint embeddings- no overlapping points\n    indx = indx + 1\n\n  \n  return[embeddings, answers, d, m]\n  "

In [212]:
def TimeEmbedding(array, d, n):
  m = mth.floor(n/(d+1)) #number of embeddings + correct one forecast into the future predictions 
  embeddings = np.zeros([m,d])
  answers = np.zeros([m,1])

  indx=0
  for i in range(0,m):
    embeddings[i,0:d] = array[0,indx:indx+d]
    #print('index: ',i)
    answers[i,0] = array[0,(indx+d)]
    indx = indx+d+1


  return[embeddings, answers, d, m]


In [213]:
'''
dateList = convertToDateTimes(rows)
ISI = ISICalc(dateList)
'''
print(ISI)


[[ 2.91666667]
 [ 3.99305556]
 [ 2.98611111]
 ...
 [16.98055556]
 [19.        ]
 [17.96319444]]


In [214]:
'''
#this code generates uniform random data between the min and max of the normal ISI training data matrix: 
import numpy as np
import numpy.random as random


ISI = np.zeros((2613, 1))

for i in range(0, 2613):
  ISI[i] = random.uniform(low=0.0, high=7.0625, size=None)

'''
n = len(ISI)
#ISIt = np.transpose(ISI)
proportionTraining = 3/4 #This parameter sets what percentage of our data will be used to train the network

trainArray = np.zeros((1,mth.floor(n*proportionTraining)))
trainArray = np.transpose(ISI[0:mth.floor(n*proportionTraining)]) 

print(trainArray)
print(ISI)
#trainArray = np.transpose(trainArray)


[[2.91666667 3.99305556 2.98611111 ... 0.00694444 0.01041667 0.01041667]]
[[ 2.91666667]
 [ 3.99305556]
 [ 2.98611111]
 ...
 [16.98055556]
 [19.        ]
 [17.96319444]]


In [215]:
[embeddingsX, answersX, dX, mX] = TimeEmbedding(trainArray, 3, 3*n/4) #four dimensional embedding

In [216]:
print(embeddingsX)

[[2.91666667 3.99305556 2.98611111]
 [1.20138889 2.84027778 1.20833333]
 [0.5625     1.61458333 2.70833333]
 ...
 [0.00694444 0.01041667 0.00694444]
 [0.00694444 1.19097222 0.01388889]
 [0.09375    0.01041667 0.00694444]]


In [217]:
#check that training embedding is correct
print(ISI[0:10])
print(answersX[0:10])

[[2.91666667]
 [3.99305556]
 [2.98611111]
 [1.83333333]
 [1.20138889]
 [2.84027778]
 [1.20833333]
 [0.77083333]
 [0.5625    ]
 [1.61458333]]
[[1.83333333e+00]
 [7.70833333e-01]
 [4.33333333e+00]
 [7.06250000e+00]
 [1.73611111e-02]
 [6.94444444e-03]
 [2.15625000e+00]
 [1.38888889e-02]
 [1.38888889e-02]
 [1.03125000e+00]]


In [218]:
testArray = np.transpose(ISI[mth.floor(3*n/4):n])
[embeddingsTestX, answersTestX, dTest, mTestX] = TimeEmbedding(testArray, 3, mth.floor(n/4))

In [219]:
#Check that test embedding is correct
print(testArray[0,0:10])
print(answersTestX[0:10])

[0.01041667 0.02083333 0.01041667 0.01041667 0.00694444 0.00694444
 0.01388889 0.43402778 0.01388889 0.01041667]
[[0.01041667]
 [0.43402778]
 [0.01041667]
 [0.01736111]
 [0.01388889]
 [0.01041667]
 [0.41319444]
 [0.02430556]
 [0.00694444]
 [0.02430556]]


In [220]:
#the purpose of the concatenate is to format the data when we have more than one vector of predictors
#not relevant now but this will be helpful when we start incorporating other regressors

inputs = np.concatenate([embeddingsX,], axis =1)
correctOutputs = np.concatenate([answersX,], axis=1)

inputsTest = np.concatenate([embeddingsTestX, ], axis = 1)
correctTestOutputs = np.concatenate([answersTestX, ], axis = 1)

In [221]:
function_approximater_A = Sequential()

In [222]:
function_approximater_A.add(Dense(units = 3, activation = 'linear', input_shape=(3,)))
#function_approximater_A.add(Dense(units = 2048, activation = 'sigmoid', input_dim = 1))
#function_approximater_A.add(Dense(units = 1024, activation = 'sigmoid', input_dim = 1))
#function_approximater_A.add(Dense(units = 512, activation = 'sigmoid', input_dim = 1))
function_approximater_A.add(Dense(units = 256, activation = 'sigmoid', input_dim = 1))
function_approximater_A.add(Dense(units = 128, activation = 'sigmoid', input_dim = 1))
function_approximater_A.add(Dense(units = 64, activation = 'sigmoid', input_dim = 1))
function_approximater_A.add(Dense(units = 32, activation = 'sigmoid', input_dim = 1))
function_approximater_A.add(Dense(units = 16, activation = 'sigmoid', input_dim = 1))
function_approximater_A.add(Dense(units = 8, activation = 'sigmoid', input_dim = 1))
#function_approximater_A.add(Dense(units = 4, activation = 'sigmoid', input_dim = 1)) 
function_approximater_A.add(Dense(units = 1, activation = 'linear', input_dim = 1))  

In [223]:
sgd_1 = tf.keras.optimizers.SGD(learning_rate=0.05)
function_approximater_A.compile(optimizer = 'SGD', loss = 'mse')

In [224]:
#running this trains the network

#Batch size equal to dataset length - all data is used during backpropagation (classic gradient descent):
#function_approximater_A.fit(inputs, correctOutputs, batch_size = len(answersX), epochs = 20)

#Batch size equal to dataset length - only one datapoint is used during backpropagation (stochastic gradient descent):
function_approximater_A.fit(inputs, correctOutputs, batch_size = 1, epochs = 20)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f7852efc310>

In [225]:
y_pred = function_approximater_A.predict(inputsTest)

In [226]:
print(y_pred)

[[0.41971576]
 [0.41971582]
 [0.41971576]
 [0.4197158 ]
 [0.41971445]
 [0.41971335]
 [0.41971382]
 [0.41971582]
 [0.41971576]
 [0.4197138 ]
 [0.41971532]
 [0.41971582]
 [0.41971457]
 [0.41971558]
 [0.41971338]
 [0.41971582]
 [0.41971353]
 [0.41971576]
 [0.41971576]
 [0.4197157 ]
 [0.41971374]
 [0.41971582]
 [0.41971582]
 [0.41971582]
 [0.41971576]
 [0.41971356]
 [0.41971582]
 [0.41971576]
 [0.41971576]
 [0.41971713]
 [0.41971526]
 [0.41971582]
 [0.41971582]
 [0.41971302]
 [0.41971588]
 [0.41971582]
 [0.41971475]
 [0.4197158 ]
 [0.41970378]
 [0.41971588]
 [0.41971582]
 [0.41971487]
 [0.41971582]
 [0.41971526]
 [0.41971517]
 [0.41971582]
 [0.41971228]
 [0.4197154 ]
 [0.41971487]
 [0.4197132 ]
 [0.41971576]
 [0.41971582]
 [0.4197123 ]
 [0.41971505]
 [0.41971323]
 [0.41971415]
 [0.4197146 ]
 [0.4197158 ]
 [0.41971576]
 [0.4197151 ]
 [0.41971564]
 [0.41971344]
 [0.41971564]
 [0.41971332]
 [0.41971377]
 [0.41971576]
 [0.419715  ]
 [0.4197146 ]
 [0.41971526]
 [0.41971332]
 [0.41971374]
 [0.41

In [227]:
temp= np.transpose(trainArray)
sum(temp)/len(temp)

#It is clear that our outputs are converging towards the mean of the training data
#which is a sign that the neural network is not adequately modelling the complexity of the time series
#I'm intrigued to see to what extent adding the discrete derivatives as regressors will help

array([0.2965338])