-
Notifications
You must be signed in to change notification settings - Fork 282
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
mnist_hierarchical_rnn.R example (#11)
* initial work on mnist_hierarchical_rnn.R example * add note to script indicating the example doesn't work yet
- Loading branch information
Showing
1 changed file
with
109 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,110 @@ | ||
#' This is an example of using Hierarchical RNN (HRNN) to classify MNIST digits. | ||
#' | ||
#' HRNNs can learn across multiple levels of temporal hiearchy over a complex sequence. | ||
#' Usually, the first recurrent layer of an HRNN encodes a sentence (e.g. of word vectors) | ||
#' into a sentence vector. The second recurrent layer then encodes a sequence of | ||
#' such vectors (encoded by the first layer) into a document vector. This | ||
#' document vector is considered to preserve both the word-level and | ||
#' sentence-level structure of the context. | ||
#' | ||
#' References: | ||
#' | ||
#' - [A Hierarchical Neural Autoencoder for Paragraphs and Documents](https://arxiv.org/abs/1506.01057) | ||
#' Encodes paragraphs and documents with HRNN. | ||
#' Results have shown that HRNN outperforms standard | ||
#' RNNs and may play some role in more sophisticated generation tasks like | ||
#' summarization or question answering. | ||
#' - [Hierarchical recurrent neural network for skeleton based action recognition](http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7298714) | ||
#' Achieved state-of-the-art results on skeleton based action recognition with 3 levels | ||
#' of bidirectional HRNN combined with fully connected layers. | ||
#' | ||
#' In the below MNIST example the first LSTM layer first encodes every | ||
#' column of pixels of shape (28, 1) to a column vector of shape (128,). The second LSTM | ||
#' layer encodes then these 28 column vectors of shape (28, 128) to a image vector | ||
#' representing the whole image. A final Dense layer is added for prediction. | ||
#' | ||
#' After 5 epochs: train acc: 0.9858, val acc: 0.9864 | ||
#' | ||
|
||
#' | ||
#' IMPORTANT NOTE: This example does net work correctly with the version of | ||
#' Keras integrated with TensorFlow (the Python variation doesn't work | ||
#' either). Therefore, we shouldn't yet add this to the list of published | ||
#' examples. | ||
#' | ||
|
||
library(keras) | ||
|
||
# Training parameters. | ||
batch_size <- 32 | ||
num_classes <- 10 | ||
epochs <- 5 | ||
|
||
# Embedding dimensions. | ||
row_hidden <- 128 | ||
col_hidden <- 128 | ||
|
||
# the data, shuffled and split between train and test sets | ||
mnist <- dataset_mnist() | ||
x_train <- mnist$train$x | ||
y_train <- mnist$train$y | ||
x_test <- mnist$test$x | ||
y_test <- mnist$test$y | ||
|
||
# Reshapes data to 4D for Hierarchical RNN. | ||
x_train <- array(as.numeric(x_train), dim = c(dim(x_train)[[1]], 28, 28, 1)) | ||
x_test <- array(as.numeric(x_test), dim = c(dim(x_test)[[1]], 28, 28, 1)) | ||
x_train <- x_train / 255 | ||
x_test <- x_test / 255 | ||
|
||
dim_x_train <- dim(x_train) | ||
cat('x_train_shape:', dim_x_train) | ||
cat(dim_x_train[[1]], 'train samples') | ||
cat(dim(x_test)[[1]], 'test samples') | ||
|
||
# Converts class vectors to binary class matrices | ||
y_train <- to_categorical(y_train, num_classes) | ||
y_test <- to_categorical(y_test, num_classes) | ||
|
||
row <- dim_x_train[[2]] | ||
col <- dim_x_train[[3]] | ||
pixel <- dim_x_train[[4]] | ||
|
||
# Model input (4D) | ||
input <- layer_input(shape = c(row, col, pixel)) | ||
|
||
# Encodes a row of pixels using TimeDistributed Wrapper | ||
encoded_rows <- input %>% time_distributed(layer_lstm(units = row_hidden)) | ||
|
||
# Encodes columns of encoded rows. | ||
encoded_columns <- encoded_rows %>% layer_lstm(units = col_hidden) | ||
|
||
# Model output | ||
prediction <- encoded_columns %>% | ||
layer_dense(units = num_classes, activation = 'softmax') | ||
|
||
# Define and compile model | ||
model <- keras_model(input, prediction) | ||
model %>% compile( | ||
loss = 'categorical_crossentropy', | ||
optimizer = 'rmsprop', | ||
metrics = c('accuracy') | ||
) | ||
|
||
# Training | ||
model %>% fit( | ||
x_train, y_train, | ||
batch_size = batch_size, | ||
epochs = epochs, | ||
verbose = 1, | ||
validation_data = list(x_test, y_test) | ||
) | ||
|
||
# Evaluation | ||
scores <- model %>% evaluate(x_test, y_test, verbose = 0) | ||
cat('Test loss:', scores[[1]], '\n') | ||
cat('Test accuracy:', scores[2], '\n') | ||
|
||
|
||
|
||
|