### Implementing Long Short-Term Memory to detect and classify Parkinsons' Freezing of Gait types in time series data

In [1]:
using Pkg

# Pkg.add("NNlib")
# Pkg.add("DataFrames")
# Pkg.add("ResumableFunctions")

In [2]:
using Flux
using Flux: @epochs, batch, throttle

using CSV
using NNlib
using DataFrames
using Distributions
using ResumableFunctions

[91m[1m┌ [22m[39m[91m[1mError: [22m[39mThis version of CUDA.jl only supports NVIDIA drivers for CUDA 11.x or higher (yours is for CUDA 9.1.0)
[91m[1m└ [22m[39m[90m@ CUDA C:\Users\hurub\.julia\packages\CUDA\s0e3j\src\initialization.jl:64[39m


In [3]:
parkinson = CSV.read("./filtered_data.csv", DataFrame)

Row,Id,Subject,Visit,Test,Medication,Time,AccV,AccML,AccAP,StartHesitation,Turn,Walking,event
Unnamed: 0_level_1,String15,String7,Int64,Int64,String3,Int64,Float64,Float64,Float64,Int64,Int64,Int64,String7
1,009ee11563,f62eec,4,2,on,0,-9.4173,0.767819,-1.75824,0,0,0,Normal
2,009ee11563,f62eec,4,2,on,1,-9.4251,0.768246,-1.75058,0,0,0,Normal
3,009ee11563,f62eec,4,2,on,2,-9.41995,0.779039,-1.74259,0,0,0,Normal
4,009ee11563,f62eec,4,2,on,3,-9.42127,0.772523,-1.74651,0,0,0,Normal
5,009ee11563,f62eec,4,2,on,4,-9.42811,0.777142,-1.75555,0,0,0,Normal
6,009ee11563,f62eec,4,2,on,5,-9.42602,0.774812,-1.76021,0,0,0,Normal
7,009ee11563,f62eec,4,2,on,6,-9.42569,0.768126,-1.7736,0,0,0,Normal
8,009ee11563,f62eec,4,2,on,7,-9.43509,0.765923,-1.76937,0,0,0,Normal
9,009ee11563,f62eec,4,2,on,8,-9.43209,0.770584,-1.77557,0,0,0,Normal
10,009ee11563,f62eec,4,2,on,9,-9.4295,0.775137,-1.77076,0,0,0,Normal


In [4]:
@resumable function data_loader(parkinson_dataframe, batch_size ; labels=["StartHesitation", "Turn", "Walking", "Normal"])
    pdf = deepcopy(parkinson_dataframe)

    for i in 1:batch_size:size(pdf, 1)
        if i > size(pdf, 1)
            break
        end
        x = hcat(
            pdf[!, "AccV"][i:i+batch_size],
            pdf[!, "AccML"][i:i+batch_size],
            pdf[!, "AccAP"][i:i+batch_size]
        )
    
        y = Flux.onehotbatch(pdf[!, "event"][i:i+batch_size], labels)
        
        @yield x, y
    end
end

data_loader (generic function with 1 method)

In [14]:
# # initialize weights with Gaussian distribution
function init_params(in::Integer, out::Integer ; mean=0.0, std=1.0)
    [
        in, out,
        rand(Truncated(Normal(mean, std), 0, 1), (out, in)), # Wf
        rand(Truncated(Normal(mean, std), 0, 1), (out, in)), # Wi
        rand(Truncated(Normal(mean, std), 0, 1), (out, in)), # Wc
        rand(Truncated(Normal(mean, std), 0, 1), (out, in)), # Wo
        rand(Truncated(Normal(mean, std), 0, 1), out), # bf
        rand(Truncated(Normal(mean, std), 0, 1), out), # bi
        rand(Truncated(Normal(mean, std), 0, 1), out), # bc
        rand(Truncated(Normal(mean, std), 0, 1), out), # bo

        # both the Long-Term and Short-Term memories are initialized with 0 values
        zeros(out, in), # c
        zeros(out, in)  # h
    ]
end

init_params (generic function with 1 method)

In [18]:
# implementing the forwarding method which is used in the Chaining process
function forward(x, lstm)
    # @show "FORWARD CALLED"
    # @assert typeof(lstm.Wf) == Matrix{Float64}
    # @assert typeof(lstm.bf) == Vector{Float64}
    
    # calculating the Memory modifier values
    f = NNlib.sigmoid_fast(lstm.Wf * x .+ lstm.bf) #
    i = NNlib.sigmoid_fast(lstm.Wi * x .+ lstm.bi) #
    o = NNlib.sigmoid_fast(lstm.Wo * x .+ lstm.bo) #

    # @show "f-i-o was calculated"

    # calculating the new memory values
    c = f .* lstm.c .+ i .* NNlib.tanh_fast(lstm.Wc * x .+ lstm.bc) # new Long-Term Memory
    h = o .* NNlib.tanh_fast(c) # new Short-Term Memory

    # @show "c-h was calculated"

    # updating the memory
    lstm.c, lstm.h = c, h

    # @show "memory was updated!"

    # returning the hidden parameters for the next layer
    # @show "assert H"
    # @assert typeof(h) == Matrix{Float64}
    # @show size(h)
    h
end

forward (generic function with 1 method)

In [7]:
# custom Long Short-Term Memory layer
mutable struct LSTM
    # input and output size of the layer
    in::Integer
    out::Integer

    Wf::Matrix{Float64} # params of the Forget Gate
    Wi::Matrix{Float64} # params of the Input Gate
    Wc::Matrix{Float64} # params of the Input Modulation Gate
    Wo::Matrix{Float64} # params of the Output Gate

    # biases of the Gates above
    bf::Vector{Float64}
    bi::Vector{Float64}
    bc::Vector{Float64}
    bo::Vector{Float64}

    # cell state (aka. long-term memory) and hidden state (aka. short-term memory)
    c::Matrix{Float64}
    h::Matrix{Float64}
end

In [8]:
# defining the constructor
LSTM(in::Integer, out::Integer) = LSTM(init_params(in, out)...)

LSTM

In [9]:
# Overload call, so the object can be used as a function
(lstm::LSTM)(x) = forward(x, lstm)

In [10]:
# creating a functor from the struct, so that the training can optimize its parameters
Flux.@functor LSTM

In [11]:
# creating the Long Short-Term Memory layer
function LSTM((in, out)::Pair)
    LSTM(in, out) # constructor
end

LSTM

In [12]:
# explicitely defining the trainable parameters of the layer
# all the Wrights and Biases are trainable
# exceptions >> Cell State and Hidden State
Flux.trainable(lstm::LSTM) = (lstm.Wf, lstm.Wi, lstm.Wc, lstm.Wo, lstm.bf, lstm.bi, lstm.bc, lstm.bo,)

In [15]:
input_size = 128
hidden_size = 20
num_classes = 4 

model = Chain(
    LSTM(input_size => hidden_size),
    Dense(hidden_size => num_classes),
    softmax
)

;

In [16]:
loss(x, y) = Flux.crossentropy(model(x), y)
optimizer = ADAM(0.001)
epochs = 10
batch_size = 127 # data loader returns batch_size + 1 samples

;

In [19]:
for epoch in 1:epochs
  for (input, output) in data_loader(parkinson, batch_size)
    grads = Flux.gradient(Flux.params(model)) do
      loss(input, output)
    end
    Flux.update!(optimizer, Flux.params(model), grads)

    break
  end

  break
end

LoadError: DimensionMismatch: arrays could not be broadcast to a common size; got a dimension with lengths 3 and 128