### Implementing Long Short-Term Memory to detect and classify Parkinsons' Freezing of Gait types in time series data

In [31]:
using Pkg

# Pkg.add("CSV")
# Pkg.add("NNlib")
# Pkg.add("DataFrames")
# Pkg.add("Distributions")
# Pkg.add("ResumableFunctions")
# Pkg.add("Flux")

In [32]:
using Flux
using Flux: @epochs, batch, throttle

using CSV
using NNlib
using DataFrames
using Distributions
using ResumableFunctions

In [33]:
parkinson = CSV.read("./filtered_data.csv", DataFrame)

Row,Id,Subject,Visit,Test,Medication,Time,AccV,AccML,AccAP,StartHesitation,Turn,Walking,event
Unnamed: 0_level_1,String15,String7,Int64,Int64,String3,Int64,Float64,Float64,Float64,Int64,Int64,Int64,String7
1,009ee11563,f62eec,4,2,on,0,-9.4173,0.767819,-1.75824,0,0,0,Normal
2,009ee11563,f62eec,4,2,on,1,-9.4251,0.768246,-1.75058,0,0,0,Normal
3,009ee11563,f62eec,4,2,on,2,-9.41995,0.779039,-1.74259,0,0,0,Normal
4,009ee11563,f62eec,4,2,on,3,-9.42127,0.772523,-1.74651,0,0,0,Normal
5,009ee11563,f62eec,4,2,on,4,-9.42811,0.777142,-1.75555,0,0,0,Normal
6,009ee11563,f62eec,4,2,on,5,-9.42602,0.774812,-1.76021,0,0,0,Normal
7,009ee11563,f62eec,4,2,on,6,-9.42569,0.768126,-1.7736,0,0,0,Normal
8,009ee11563,f62eec,4,2,on,7,-9.43509,0.765923,-1.76937,0,0,0,Normal
9,009ee11563,f62eec,4,2,on,8,-9.43209,0.770584,-1.77557,0,0,0,Normal
10,009ee11563,f62eec,4,2,on,9,-9.4295,0.775137,-1.77076,0,0,0,Normal


In [34]:
@resumable function data_loader(parkinson_dataframe, batch_size ; labels=["StartHesitation", "Turn", "Walking", "Normal"])
    pdf = deepcopy(parkinson_dataframe)

    for i in 1:batch_size:size(pdf, 1)
        if i > size(pdf, 1)
            break
        end
        x = hcat(
            pdf[!, "AccV"][i:i+batch_size],
            pdf[!, "AccML"][i:i+batch_size],
            pdf[!, "AccAP"][i:i+batch_size]
        )
    
        y = Flux.onehotbatch(pdf[!, "event"][i:i+batch_size], labels)
        
        @yield x, y
    end
end

data_loader (generic function with 1 method)

In [35]:
function init_params(in::Integer, out::Integer, train::Bool ; mean=0.0, std=1.0)
    [
        in, out,
        rand(Truncated(Normal(mean, std), -1, 1), 1)[1], # Wf
        rand(Truncated(Normal(mean, std), -1, 1), 1)[1], # Wi
        rand(Truncated(Normal(mean, std), -1, 1), 1)[1], # Wc
        rand(Truncated(Normal(mean, std), -1, 1), 1)[1], # Wo
        rand(Truncated(Normal(mean, std), -1, 1), 1)[1], # bf
        rand(Truncated(Normal(mean, std), -1, 1), 1)[1], # bi
        rand(Truncated(Normal(mean, std), -1, 1), 1)[1], # bc
        rand(Truncated(Normal(mean, std), -1, 1), 1)[1], # bo
        rand(Truncated(Normal(mean, std), -1, 1), 1)[1], # bo
        rand(Truncated(Normal(mean, std), -1, 1), 1)[1], # bo
        rand(Truncated(Normal(mean, std), -1, 1), 1)[1], # bo
        rand(Truncated(Normal(mean, std), -1, 1), 1)[1], # bo

        # both the Long-Term and Short-Term memories are initialized with 0 values
        0,  # c
        0,  # h
        true # train
    ]
end


init_params (generic function with 1 method)

In [36]:
# implementing the forwarding method which is used in the Chaining process
function forward(x, lstm)
    @show "FORWARD CALLED"
    long_remember_percent = NNlib.sigmoid_fast((lstm.c * lstm.wlr1) + (x * lstm.wlr2) + lstm.blr1)
    potential_remember_percent = NNlib.sigmoid_fast((lstm.h * lstm.wpr1) + (x * lstm.wpr2) + lstm.bpr1)
    potential_memory = NNlib.tanh_fast((lstm.c * lstm.wp1) + (x * lstm.wp2) + lstm.bp1)
    updated_long_memory = (lstm.c * long_remember_percent) + (potential_memory * potential_remember_percent)
    output_percent = NNlib.sigmoid_fast((lstm.h * lstm.wo1) + (x * lstm.wo2) + lstm.bo1)
    updated_short_memory = NNlib.tanh_fast(updated_long_memory) * output_percent

    # updating the memory
    if lstm.train
        lstm.c, lstm.h = updated_long_memory, updated_short_memory
    end

    [updated_short_memory]
end

forward (generic function with 1 method)

In [37]:
# custom Long Short-Term Memory layer
mutable struct LSTM
    # input and output size of the layer
    in::Integer
    out::Integer

    # Wf::Matrix{Float64} # params of the Forget Gate
    # Wi::Matrix{Float64} # params of the Input Gate
    # Wc::Matrix{Float64} # params of the Input Modulation Gate
    # Wo::Matrix{Float64} # params of the Output Gate

    # # biases of the Gates above
    # bf::Vector{Float64}
    # bi::Vector{Float64}
    # bc::Vector{Float64}
    # bo::Vector{Float64}

    wlr1::Float64
    wlr2::Float64
    blr1::Float64

    wpr1::Float64
    wpr2::Float64
    bpr1::Float64

    wp1::Float64
    wp2::Float64
    bp1::Float64

    wo1::Float64
    wo2::Float64
    bo1::Float64


    # cell state (aka. long-term memory) and hidden state (aka. short-term memory)
    c::Float64
    h::Float64

    train::Bool
end

In [38]:
# defining the constructor
LSTM(in::Integer, out::Integer, train::Bool) = LSTM(init_params(in, out, train)...)

LSTM

In [39]:
# Overload call, so the object can be used as a function
(lstm::LSTM)(x; train = true) = forward(x, lstm, train)

In [40]:
# creating a functor from the struct, so that the training can optimize its parameters
Flux.@functor LSTM

In [41]:
# creating the Long Short-Term Memory layer
function LSTM((in, out)::Pair, train)
    LSTM(in, out, train) # constructor
end

LSTM

In [42]:
# explicitely defining the trainable parameters of the layer
# all the Wrights and Biases are trainable
# exceptions >> Cell State and Hidden State
# Flux.trainable(lstm::LSTM) = (lstm.Wf, lstm.Wi, lstm.Wc, lstm.Wo, lstm.bf, lstm.bi, lstm.bc, lstm.bo,)
Flux.trainable(lstm::LSTM) = (lstm.wlr1, lstm.wlr2, lstm.blr1, lstm.wpr1, lstm.wpr2, lstm.bpr1, lstm.wp1, lstm.wp2, lstm.bp1, lstm.wo1, lstm.wo2, lstm.bo1)

In [43]:
input_size = 128
hidden_size = 1
num_classes = 4 

model = Chain(
    LSTM(input_size => hidden_size, true),
    Dense(hidden_size => num_classes),
    softmax
)

;

In [44]:
loss(x, y) = Flux.crossentropy(model(x, false), y)
optimizer = ADAM(0.001)
epochs = 10
batch_size = 127 # data loader returns batch_size + 1 samples

;

In [45]:
for epoch in 1:epochs
  for (input, output) in [(1, [0,0,1,0])] #data_loader(parkinson, batch_size)
    grads = Flux.gradient(Flux.params(model)) do
      loss(input, output)
    end
    Flux.update!(optimizer, Flux.params(model), grads)

    break
  end

  break
end

LoadError: MethodError: no method matching (::Chain{Tuple{LSTM, Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}, typeof(softmax)}})(::Int64, ::Bool)

[0mClosest candidates are:
[0m  (::Chain)(::Any)
[0m[90m   @[39m [32mFlux[39m [90m~/.julia/packages/Flux/n3cOc/src/layers/[39m[90m[4mbasic.jl:51[24m[39m


In [None]:
model(3, false)

LoadError: MethodError: no method matching (::Chain{Tuple{LSTM, Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}, typeof(softmax)}})(::Int64, ::Bool)

[0mClosest candidates are:
[0m  (::Chain)(::Any)
[0m[90m   @[39m [35mFlux[39m [90m~/.julia/packages/Flux/n3cOc/src/layers/[39m[90m[4mbasic.jl:51[24m[39m
