# Dynamic Models - Final Model Building and Training

Import Julia machine learning packages, plotting package, and file loader

In [1]:
using Flux,Statistics,Plots,MLDataUtils,DelimitedFiles
using Flux.Data: DataLoader
include("utilityfunc.jl");

Define the upper and lower bounds on the features and define the scaling and unscaling functions

In [3]:
#Define scaler functions to preprocess data
conc_ub=Float32(.6)
#set lower and upper bounds on Lpt, Kt, Rs, ti,ci (Lpt is first element, Kt is second element, Rs is the third element, ti is the fourth element, ci is the fifth element
i_lb=Float32[5e-7,7e-7,5.0,0.0,0.0] 
i_ub=Float32[5e-6,5e-6,30,(300/3600),conc_ub]
iscaler(x)=(x-i_lb)./(i_ub-i_lb) #Min-Max Normalization on Features
inv_iscaler(x)=x.*(i_ub.-i_lb).+i_lb
oscaler(x)= x ./ conc_ub #Min-Max Normalization on Target
inv_oscaler(x)=x.*conc_ub

inv_oscaler (generic function with 1 method)

In [4]:
function iscalerbatch(x) #Min-Max Normalization on Matrices
    nfeat,sl = size(x)
    out=Array{Float32,2}(undef,nfeat,sl)
    for i=1:sl
        out[:,i]=iscaler(x[:,i])
    end
    return out
end

iscalerbatch (generic function with 1 method)

In [5]:
function inv_iscalerbatch(x)
    nfeat,sl = size(x)
    out=Array{Float32,2}(undef,nfeat,sl)
    for i=1:sl
        out[:,i]=inv_iscaler(x[:,i])
    end
    return out
end

inv_iscalerbatch (generic function with 1 method)

In [6]:
n_samp=Int64(1e4); # number of samples
nbatch = n_samp; # number of batches
nfeat=5; #5 features
ntarg=1; #1 target values
sl=20; #sequence length of 20

Load in the data and scale the features and output

In [7]:
d = readdlm("MLFinalData_v3.csv",',',Float32) #entire dataset
X= iscalerbatch(d[1:5,:]); #Divide into input and scale
Y= oscaler.(d[6,:]); #Divide into outputput and scale

Split the dataset into training, validation, and testsets using an 80/10/10 split

In [8]:
(x_train, y_train), (x_val, y_val),(x_test, y_test) = splitobs((X, Y), at = (0.8,.1) );

In [9]:
YVAL = y_val[1:end]
YTEST = y_test[1:end];

In [10]:
data = DataLoader((x_train,y_train)); #Package data in Flux.jl's dataloader

Set up learning rate modes and number of epochs. Here we use a exponentially decaying learning rate for the MLP model and a constant learning rate for the recurrent models. We train each model for a 100 epochs.

In [11]:
decay(epoch)=1e-3*exp(-.023*epoch)
lr=5e-5;
n_epochs = 100;

# MLP Code

Set up MLP model - Here we use 1 hidden layer with 8 nodes, the ReLU activation function, and the MSE loss function

In [34]:
n_hidden_mlp=8;
activation_func=Flux.relu;
m_MLP = Chain(Dense(nfeat,n_hidden_mlp),Dense(n_hidden_mlp,n_hidden_mlp,activation_func),Dense(n_hidden_mlp,ntarg));
function loss_MLP(x, y)
     Flux.mse(m_MLP(x), y)
end
ps_MLP = params(m_MLP);

Train the model using the ADAM Optimizer for a 100 epochs with early stopping

In [None]:
e_log_m = zeros(n_epochs);
for i=1:n_epochs
    Flux.train!(loss_MLP, ps_MLP, data, ADAM(decay(i)))
    e_log_m[i]=cust_mse(x_val,YVAL,m_MLP)
    if (i%1 == 0 || i ==1)
        @show(i,e_log_m[i])
    end
    if (e_log_m[i]<1e-7) #early stopping criteria of 1e-7
        break
    end
end

Evaluate performance metrics on the test set

In [None]:
@show cust_mse(x_test,YTEST,m_MLP)
@show mean_error_func(x_test,YTEST,m_MLP)
@show RMSE(x_test,YTEST,m_MLP)

# RNN Code

Set up the vanilla RNN model - Here we use 1 hidden layer with 32 nodes and the MSE loss function

In [38]:
n_hidden_rnn=32;
#activation_func=Flux.tanh
m_RNN = Chain(Dense(nfeat,n_hidden_rnn),RNN(n_hidden_rnn,n_hidden_rnn),Dense(n_hidden_rnn,ntarg))
function loss_RNN(x, y)
    Flux.reset!(m_RNN)
     Flux.mse(m_RNN(x), y)
end
ps_RNN = params(m_RNN);

Train the model using the ADAM Optimizer for a 100 epochs with early stopping

In [None]:
e_log_r = zeros(n_epochs)
for i=1:n_epochs
    Flux.train!(loss_RNN, ps_RNN, data, ADAM(5e-5))
    if (i%1 == 0 || i ==1)
        @show(i,cust_mse(x_val,YVAL,m_RNN))
    end
    if (e_log_r[i]<5e-7)
        break
    end
end

Evaluate performance metrics on the test set

In [None]:
@show cust_mse(x_test,YTEST,m_RNN)
@show mean_error_func(x_test,YTEST,m_RNN)
@show RMSE(x_test,YTEST,m_RNN)

# LSTM Code

Set up the LSTM model - Here we use 1 hidden layer with 8 nodes and the MSE loss function

In [12]:
n_hidden_lstm=8;
m_LSTM = Chain(Dense(nfeat,n_hidden_lstm),LSTM(n_hidden_lstm,n_hidden_lstm),Dense(n_hidden_lstm,ntarg))
function loss_LSTM(x, y)
    Flux.reset!(m_LSTM)
     Flux.mse(m_LSTM(x), y)
end

loss_LSTM (generic function with 1 method)

In [31]:
dr(e)=5e-4*exp(-.016*e)

dr (generic function with 1 method)

Train the model using the ADAM Optimizer for a 100 epochs with early stopping

In [None]:
ps_LSTM = params(m_LSTM)
e_log_l = zeros(n_epochs)
for i=1:n_epochs
    Flux.train!(loss_LSTM, ps_LSTM, data, ADAM(dr(i)))
    e_log_l[i]=cust_mse(x_val,YVAL,m_LSTM)
    if (i%1 == 0 || i ==1)
        @show(i,e_log_l[i])
    end
    if (e_log_l[i]<1e-7)
        break
    end
end

In [21]:
@show cust_mse(x_test,YTEST,m_LSTM)
@show mean_error_func(x_test,YTEST,m_LSTM)
@show RMSE(x_test,YTEST,m_LSTM)

cust_mse(x_test, YTEST, m_LSTM) = 2.212111f-7
mean_error_func(x_test, YTEST, m_LSTM) = 10.310354f0
RMSE(x_test, YTEST, m_LSTM) = 0.00047033082f0


0.00047033082f0

Export the models (Uncomment lines to save models)

In [15]:
using BSON: @save
#@save "m_MLP.bson" m_MLP
#@save "m_RNN_f.bson" m_RNN
#@save "m_LSTM.bson" m_LSTM