In [None]:
library(keras)
library(tidyverse)

train_images and train_labels form the training set, the data that the model will
learn from. The model will then be tested on the test set, test_images and test_labels

In [None]:
mnist <- dataset_mnist()
train_images <- mnist$train$x
train_labels <- mnist$train$y
test_images <- mnist$test$x
test_labels <- mnist$test$y

we’ll feed the neural network the training data,
train_images and train_labels

In [None]:
network <- keras_model_sequential() %>%
 layer_dense(units = 512, activation = "relu", input_shape = c(28 * 28))  %>%
 layer_dense(units = 10, activation = 'softmax')

The compilation step

In [None]:
network %>% compile(
optimizer = "rmsprop",
loss = "categorical_crossentropy",
metrics = c("accuracy")
)

Previous array has shape (60000, 28, 28) with value in [0, 255], we transform it into shape (60000, 28, 28) with values between 0 and 1

In [None]:
train_images <- array_reshape(train_images, c(60000, 28 * 28))
train_images <- train_images / 255
test_images <- array_reshape(test_images, c(10000, 28 * 28))
test_images <- test_images / 255

In [None]:
train_labels <- to_categorical(train_labels)
test_labels <- to_categorical(test_labels)

train the model

In [None]:
network %>% fit(train_images, train_labels, epochs = 5, batch_size = 128)

In [None]:
metrics <- network %>% evaluate(test_images, test_labels)
metrics

In [None]:
# 1D tensor
x <- c(12, 3, 6, 14, 10)
 
# matrics(2D tensors)
x <- matrix(rep(0, 3*5), nrow = 3, ncol = 5)
x

# 3d tensors
x <- array(rep(0, 2*3*2), dim = c(2,3,2))
dim(x)

A tensor is defined by three key attributes:

- Number of axes
- Shape: an integer vector that describes how many dimensions the tensor has
along each axis.
- Data typeThe following selects digits #10 to #99 and puts them in an array of shape (90, 28,
28):

The following selects digits #10 to #99 and puts them in an array of shape (90, 28,
28):

In [None]:
my_slice <- train_images[10:99,,]
dim(my_slice)

deep-learning models don’t process an entire dataset at once; rather, they
break the data into small batches. Concretely, here’s one batch of our MNIST digits, with
batch size of 128

In [None]:
batch <- train_images[1:128,,]
batch <- train_images[129:256,,]

# 6.1 Working with text data

Vectorizing text can be done in mutiple ways

- Segment text into words, and transform each word into a vector.
- Segment text into characters, and transform each character into a vector.
- Extract N-grams of words or characters, and transform each N-gram into a vector.

In [None]:
## one hot encoding
samples <- c("The cat sat on the mat.", "The dog ate my homework.")
token_index <- list()
for (sample in samples){
    for (word in strsplit(sample, " ")[[1]]){
        if (!word %in% names(token_index)){
            token_index[[word]] <- length(token_index) + 2
        }
    }
}
max_length <- 10
results <- array(0, dim = c(length(samples),max_length,max(as.integer(token_index))))

for (i in 1:length(samples)) {
sample <- samples[[i]]
words <- head(strsplit(sample, " ")[[1]], n = max_length)
for (j in 1:length(words)) {
index <- token_index[[words[[j]]]]
results[[i, j, index]] <- 1
}
}
    
             

In [None]:
# using keras for word-level one-hot encoding

library(keras)
# Creates a tokenizer, configured to only take into account the 1,000 most common words
samples <- c("The cat sat on the mat.", "The dog ate my homework.")
tokenizer <- text_tokenizer(num_words = 1000) %>% 
    fit_text_tokenizer(samples)

sequences <- texts_to_sequence(tokenizer, samples)
one_hot_results <- texts_to_matrix(tokenizer, samples, mode = "binary")
word_index <- tokenizer$word_index
cat("Found", length(word_index), "unique tokens\n.")


# using word embeddings

## LEARNING WORD EMBEDDINGS WITH AN EMBEDDING LAYER

In [None]:
# input_dim: the number of possible tokens
embbedding_layer <- layer_embedding(input_dim = 1000, output_dim=64)

In [None]:
max_features <- 10000
maxlen <- 20
imdb <- dataset_imdb(num_word = max_features)
c(c(x_train, y_train), c(x_test, y_test)) %<-% imdb

x_train <- pad_sequences(x_train, maxlen = maxlen)
x_test <- pad_sequences(x_test, maxlen = maxlen)

In [None]:
model <- keras_model_sequential() %>%
    layer_embedding(input_dim = 10000, output_dim=8, input_length=maxlen) %>%
layer_flatten()
layer_dense(unit=1, activation = 'sigmoid')

model %>% compile(
optimizer = 'rmsprop',
loss = "binary_crossentropy",
metrics = c("acc")
)

summary(model)
history <- model %>% 
    fit(x_train, y_train, epochs = 10, batch_size = 32, validation_split = 0.2)

In [None]:
# using pretrained word embedding
imdb_dir <- "~/Downloads/aclImdb"
train_dir <- file.path(imdb_dir, "train")
labels <- c()
texts <- c()
for (label_type in c("neg", "pos")) {
label <- switch(label_type, neg = 0, pos = 1)
dir_name <- file.path(train_dir, label_type)
for (fname in list.files(dir_name, pattern = glob2rx("*.txt"),
full.names = TRUE)) {
texts <- c(texts, readChar(fname, file.info(fname)$size))
labels <- c(labels, label)
}
}
TOKENIZING

In [None]:
maxlen <- 100
training_samples <- 200
validation_samples <- 10000
max_words <- 10000
tokenizer <- text_tokenizer(num_words = max_words) %>%
fit_text_tokenizer(texts)
sequences <- texts_to_sequences(tokenizer, texts)
word_index = tokenizer$word_index
cat("Found", length(word_index), "unique tokens.\n")
data <- pad_sequences(sequences, maxlen = maxlen)
labels <- as.array(labels)
cat("Shape of data tensor:", dim(data), "\n")
cat('Shape of label tensor:', dim(labels), "\n")
indices <- sample(1:nrow(data))
training_indices <- indices[1:training_samples]
validation_indices <- indices[(training_samples + 1):
(training_samples + validation_samples)]
x_train <- data[training_indices,]
y_train <- labels[training_indices]
x_val <- data[validation_indices,]
y_val <- labels[validation_indices]

# using glove embedding 
1. download from nlp.stanford.edu/projects/glove

In [None]:
# using glove embedding 
glove_dir = '~/Downloads/glove.6B'
lines <- readLines(file.path(glove_dir, "glove.6B.100d.txt"))
embeddings_index <- new.env(hash = TRUE, parent = emptyenv())
for (i in 1:length(lines)) {
line <- lines[[i]]
values <- strsplit(line, " ")[[1]]
word <- values[[1]]
embeddings_index[[word]] <- as.double(values[-1])
}
cat("Found", length(embeddings_index), "word vectors.\n")

# prepare the glove word-embedding matrix
embedding_dim <- 100
embedding_matrix <- array(0, c(max_words, embedding_dim))
for (word in names(word_index)) {
index <- word_index[[word]]
if (index < max_words) {
embedding_vector <- embeddings_index[[word]]
if (!is.null(embedding_vector))
embedding_matrix[index+1,] <- embedding_vector
}
}

# understanding recurrent neural networks
It loops over timesteps, and at each timestep, it considers its current state at t and the input at t.


In [None]:
state_t <- 0
for (input_t in input_sequence) {
output_t <- activation(dot(W, input_t) + dot(U, state_t) + b)
state_t <- output_t
}

output_t <- tanh(as.numeric((W %*% input_t) + (U %*% state_t) + b))

In [None]:
# layer_simple_rnn can be run in two different modes: it can return either the full sequences of successive outputs for each timestep (a
# 3D tensor of shape (batch_size, timesteps, output_features)) or only the last output for each input sequence
library(keras)
model <- keras_model_sequential() %>%
layer_embedding(input_dim = 10000, output_dim = 32) %>%
layer_simple_rnn(units = 32)
> summary(model)


In [None]:
# we can stack several recurrent layers one after the other
model <- keras_model_sequential() %>%
layer_embedding(input_dim = 10000, output_dim = 32) %>%
layer_simple_rnn(units = 32, return_sequences = TRUE) %>%
layer_simple_rnn(units = 32, return_sequences = TRUE) %>%
layer_simple_rnn(units = 32, return_sequences = TRUE) %>%
layer_simple_rnn(units = 32)

# IMDB movie-review-classification problem

In [None]:
library(keras)
max_features <- 10000
maxlen <- 500
batch_size <- 32
cat("Loading data...\n")
imdb <- dataset_imdb(num_words = max_features)
c(c(input_train, y_train), c(input_test, y_test)) %<-% imdb
cat(length(input_train), "train sequences\n")
cat(length(input_test), "test sequences")
cat("Pad sequences (samples x time)\n")
input_train <- pad_sequences(input_train, maxlen = maxlen)
input_test <- pad_sequences(input_test, maxlen = maxlen)
cat("input_train shape:", dim(input_train), "\n")
cat("input_test shape:", dim(input_test), "\n")

In [None]:
# Training the model with embedding and simple RNN layers
model <- keras_model_sequential() %>%
layer_embedding(input_dim = max_features, output_dim = 32) %>%
layer_simple_rnn(units = 32) %>%
layer_dense(units = 1, activation = "sigmoid")
model %>% compile(
optimizer = "rmsprop",
loss = "binary_crossentropy",
metrics = c("acc")
)
history <- model %>% fit(
input_train, y_train,
epochs = 10,
batch_size = 128,
validation_split = 0.2
)

In [None]:
plot(history)

# LSTM in Keras

In [None]:
model <- keras_model_sequential() %>%
layer_embedding(input_dim = max_features, output_dim = 32) %>%
layer_lstm(units = 32) %>%
layer_dense(units = 1, activation = "sigmoid")
model %>% compile(
optimizer = "rmsprop",
loss = "binary_crossentropy",
metrics = c("acc")
)
history <- model %>% fit(
input_train, y_train,
epochs = 10,
batch_size = 128,
validation_split = 0.2
)

 # 6.3 Advanced use of recurrent neural networks

## Gated recurrent unit layer

In [None]:
model <- keras_model_sequential() %>%
layer_gru(units = 32, input_shape = list(NULL, dim(data)[[-1]])) %>%
layer_dense(units = 1)
model %>% compile(

# using dropout

In [None]:

model <- keras_model_sequential() %>%
layer_gru(units = 32, dropout = 0.2, recurrent_dropout = 0.2,
input_shape = list(NULL, dim(data)[[-1]])) %>%
layer_dense(units = 1)

# Stacking recurrent layers

In [None]:
# Stacking recurrent layers
model <- keras_model_sequential() %>%
layer_gru(units = 32,
dropout = 0.1,
recurrent_dropout = 0.5,
return_sequences = TRUE,
input_shape = list(NULL, dim(data)[[-1]])) %>%
layer_gru(units = 64, activation = "relu",
dropout = 0.1,
recurrent_dropout = 0.5) %>%
layer_dense(units = 1)

# Using bidirectional RNNs

In [None]:
model <- keras_model_sequential() %>%
layer_embedding(input_dim = max_features, output_dim = 32) %>%
bidirectional(
layer_lstm(units = 32)
) %>%
layer_dense(units = 1, activation = "sigmoid")

# bidirectional GRU
model <- keras_model_sequential() %>%
bidirectional(
layer_gru(units = 32), input_shape = list(NULL, dim(data)[[-1]])
) %>%
layer_dense(units = 1)

# 6.4 Sequence processing with convnets

In [None]:
model <- keras_model_sequential() %>%
layer_conv_1d(filters = 32, kernel_size = 5, activation = "relu",
input_shape = list(NULL, dim(data)[[-1]])) %>%
layer_max_pooling_1d(pool_size = 3) %>%
layer_conv_1d(filters = 32, kernel_size = 5, activation = "relu") %>%
layer_max_pooling_1d(pool_size = 3) %>%
layer_conv_1d(filters = 32, kernel_size = 5, activation = "relu") %>%
layer_global_max_pooling_1d() %>%
layer_dense(units = 1)
