### Implementing Long Short-Term Memory to detect and classify Parkinsons' Freezing of Gait types in time series data

In [None]:
using Pkg

# Pkg.add("NNlib")
# Pkg.add("DataFrames")
# Pkg.add("ResumableFunctions")

: 

In [None]:
using Flux
using Flux: @epochs, batch, throttle

using CSV
using NNlib
using DataFrames
using Distributions
using ResumableFunctions

In [None]:
parkinson = CSV.read("./filtered_data.csv", DataFrame)

In [None]:
@resumable function data_loader(parkinson_dataframe, batch_size ; labels=["StartHesitation", "Turn", "Walking", "Normal"])
    pdf = deepcopy(parkinson_dataframe)

    for i in 1:batch_size:size(pdf, 1)
        if i > size(pdf, 1)
            break
        end
        x = hcat(
            pdf[!, "AccV"][i:i+batch_size],
            pdf[!, "AccML"][i:i+batch_size],
            pdf[!, "AccAP"][i:i+batch_size]
        )
    
        y = Flux.onehotbatch(pdf[!, "event"][i:i+batch_size], labels)
        
        @yield x, y
    end
end

In [None]:
# # initialize weights with Gaussian distribution
function init_params(in::Integer, out::Integer ; mean=0.0, std=1.0)
    [
        in, out,
        rand(Truncated(Normal(mean, std), -1, 1), (out, in)), # Wf
        rand(Truncated(Normal(mean, std), -1, 1), (out, in)), # Wi
        rand(Truncated(Normal(mean, std), -1, 1), (out, in)), # Wc
        rand(Truncated(Normal(mean, std), -1, 1), (out, in)), # Wo
        rand(Truncated(Normal(mean, std), -1, 1), out), # bf
        rand(Truncated(Normal(mean, std), -1, 1), out), # bi
        rand(Truncated(Normal(mean, std), -1, 1), out), # bc
        rand(Truncated(Normal(mean, std), -1, 1), out), # bo

        # both the Long-Term and Short-Term memories are initialized with 0 values
        zeros(out, in), # c
        zeros(out, in)  # h
    ]
end

In [None]:
# implementing the forwarding method which is used in the Chaining process
function forward(x, lstm)
    @show "FORWARD CALLED"
    # @assert typeof(lstm.Wf) == Matrix{Float64}
    # @assert typeof(lstm.bf) == Vector{Float64}
    
    # calculating the Memory modifier values
    f = NNlib.sigmoid_fast(lstm.Wf * x .+ lstm.bf) #
    i = NNlib.sigmoid_fast(lstm.Wi * x .+ lstm.bi) #
    o = NNlib.sigmoid_fast(lstm.Wo * x .+ lstm.bo) #

    @show "f-i-o was calculated"

    # calculating the new memory values
    @show size(f)
    @show size(lstm.c)
    @show size(i)
    @show size(lstm.Wc)
    @show size(x)
    @show size(lstm.bc)
    @show size(NNlib.tanh_fast(lstm.Wc * x .+ lstm.bc))
    c = f * lstm.c .+ i .* NNlib.tanh_fast(lstm.Wc * x .+ lstm.bc) # new Long-Term Memory

    @show "C calculated"
    @show size(o), size(c)
    h = o .* NNlib.tanh_fast(c) # new Short-Term Memory

    @show "c-h was calculated"

    # updating the memory
    lstm.c, lstm.h = c, h

    @show "memory was updated!"

    # returning the hidden parameters for the next layer
    @show "assert H"
    # @assert typeof(h) == Matrix{Float64}
    @show size(h)
    h
end

In [None]:
# custom Long Short-Term Memory layer
mutable struct LSTM
    # input and output size of the layer
    in::Integer
    out::Integer

    Wf::Matrix{Float64} # params of the Forget Gate
    Wi::Matrix{Float64} # params of the Input Gate
    Wc::Matrix{Float64} # params of the Input Modulation Gate
    Wo::Matrix{Float64} # params of the Output Gate

    # biases of the Gates above
    bf::Vector{Float64}
    bi::Vector{Float64}
    bc::Vector{Float64}
    bo::Vector{Float64}

    # cell state (aka. long-term memory) and hidden state (aka. short-term memory)
    c::Matrix{Float64}
    h::Matrix{Float64}
end

In [None]:
# defining the constructor
LSTM(in::Integer, out::Integer) = LSTM(init_params(in, out)...)

In [None]:
# Overload call, so the object can be used as a function
(lstm::LSTM)(x) = forward(x, lstm)

In [None]:
# creating a functor from the struct, so that the training can optimize its parameters
Flux.@functor LSTM

In [None]:
# creating the Long Short-Term Memory layer
function LSTM((in, out)::Pair)
    LSTM(in, out) # constructor
end

In [None]:
# explicitely defining the trainable parameters of the layer
# all the Wrights and Biases are trainable
# exceptions >> Cell State and Hidden State
Flux.trainable(lstm::LSTM) = (lstm.Wf, lstm.Wi, lstm.Wc, lstm.Wo, lstm.bf, lstm.bi, lstm.bc, lstm.bo,)

In [None]:
input_size = 128
hidden_size = 3
num_classes = 4 

model = Chain(
    LSTM(input_size => hidden_size),
    Dense(hidden_size => num_classes),
    softmax
)

;

In [None]:
loss(x, y) = Flux.crossentropy(model(x), y)
optimizer = ADAM(0.001)
epochs = 10
batch_size = 127 # data loader returns batch_size + 1 samples

;

In [None]:
for epoch in 1:epochs
  for (input, output) in data_loader(parkinson, batch_size)
    grads = Flux.gradient(Flux.params(model)) do
      loss(input, output)
    end
    Flux.update!(optimizer, Flux.params(model), grads)

    break
  end

  break
end