diff --git a/R/datasets.R b/R/datasets.R index d43dfc394..c1b63f4c2 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -6,8 +6,19 @@ #' Dataset of 50,000 32x32 color training images, labeled over 10 categories, #' and 10,000 test images. #' +#' @param convert When `TRUE` (default) the datasets are returned as R arrays. +#' If `FALSE`, objects are returned as NumPy arrays. +#' #' @returns Lists of training and test data: `train$x, train$y, test$x, test$y`. #' +#' ```{r cifar10-str-true} +#' str(dataset_cifar10()) +#' ``` +#' +#' ```{r cifar10-str-false} +#' str(dataset_cifar10(convert = FALSE)) +#' ``` +#' #' The `x` data is an array of RGB image data with shape (num_samples, 3, 32, #' 32). #' @@ -17,8 +28,11 @@ #' @family datasets #' #' @export -dataset_cifar10 <- function() { - dataset <- keras$datasets$cifar10$load_data() +dataset_cifar10 <- function(convert = TRUE) { + dataset <- call_dataset_loader( + loader = keras$datasets$cifar10$load_data, + convert = convert + ) as_dataset_list(dataset) } @@ -30,9 +44,18 @@ dataset_cifar10 <- function() { #' and 10,000 test images. #' #' @param label_mode one of "fine", "coarse". +#' @inheritParams dataset_cifar10 #' #' @returns Lists of training and test data: `train$x, train$y, test$x, test$y`. #' +#' ```{r cifar100-str-true} +#' str(dataset_cifar100()) +#' ``` +#' +#' ```{r cifar100-str-false} +#' str(dataset_cifar100(convert = FALSE)) +#' ``` +#' #' The `x` data is an array of RGB image data with shape (num_samples, 3, 32, 32). #' #' The `y` data is an array of category labels with shape (num_samples). @@ -40,9 +63,13 @@ dataset_cifar10 <- function() { #' @family datasets #' #' @export -dataset_cifar100 <- function(label_mode = c("fine", "coarse")) { - dataset <- keras$datasets$cifar100$load_data( - label_mode = match.arg(label_mode) +dataset_cifar100 <- function(label_mode = c("fine", "coarse"), convert = TRUE) { + dataset <- call_dataset_loader( + loader = keras$datasets$cifar100$load_data, + convert = convert, + args = list( + label_mode = match.arg(label_mode) + ) ) as_dataset_list(dataset) } @@ -75,9 +102,19 @@ dataset_cifar100 <- function(label_mode = c("fine", "coarse")) { #' @param oov_char Words that were cut out because of the `num_words` or #' `skip_top` limit will be replaced with this character. #' @param index_from Index actual words with this index and higher. +#' @inheritParams dataset_cifar10 #' #' @returns Lists of training and test data: `train$x, train$y, test$x, test$y`. #' +#' ``` +#' train/ +#' ├─ x +#' └─ y +#' test/ +#' ├─ x +#' └─ y +#' ``` +#' #' The `x` data includes integer sequences. If the `num_words` argument was #' specific, the maximum possible index value is `num_words-1`. If the #' `maxlen` argument was specified, the largest possible sequence length is @@ -85,6 +122,14 @@ dataset_cifar100 <- function(label_mode = c("fine", "coarse")) { #' #' The `y` data includes a set of integer labels (0 or 1). #' +#' ```{r imdb-str-true} +#' str(dataset_imdb()) +#' ``` +#' +#' ```{r imdb-str-false} +#' str(dataset_imdb(convert = FALSE)) +#' ``` +#' #' The `dataset_imdb_word_index()` function returns a list where the #' names are words and the values are integer. #' @@ -92,19 +137,24 @@ dataset_cifar100 <- function(label_mode = c("fine", "coarse")) { #' #' @export dataset_imdb <- function(path = "imdb.npz", num_words = NULL, skip_top = 0L, maxlen = NULL, - seed = 113L, start_char = 1L, oov_char = 2L, index_from = 3L) { - dataset <- keras$datasets$imdb$load_data( - path = path, - num_words = as_nullable_integer(num_words), - skip_top = as.integer(skip_top), - maxlen = as_nullable_integer(maxlen), - seed = as.integer(seed), - start_char = as.integer(start_char), - oov_char = as.integer(oov_char), - index_from = as.integer(index_from) + seed = 113L, start_char = 1L, oov_char = 2L, index_from = 3L, + convert = TRUE) { + dataset <- call_dataset_loader( + loader = keras$datasets$imdb$load_data, + convert = convert, + args = list( + path = path, + num_words = as_nullable_integer(num_words), + skip_top = as.integer(skip_top), + maxlen = as_nullable_integer(maxlen), + seed = as.integer(seed), + start_char = as.integer(start_char), + oov_char = as.integer(oov_char), + index_from = as.integer(index_from) + ) ) - as_sequences_dataset_list(dataset) + as_sequences_dataset_list(dataset, convert = convert) } @@ -136,30 +186,52 @@ dataset_imdb_word_index <- function(path = "imdb_word_index.json") { #' @param oov_char words that were cut out because of the `num_words` or #' `skip_top` limit will be replaced with this character. #' @param index_from index actual words with this index and higher. +#' @inheritParams dataset_cifar10 #' #' @returns Lists of training and test data: `train$x, train$y, test$x, test$y` #' with same format as [dataset_imdb()]. The `dataset_reuters_word_index()` #' function returns a list where the names are words and the values are #' integer. e.g. `word_index[["giraffe"]]` might return `1234`. #' +#' ``` +#' train/ +#' ├─ x +#' └─ y +#' test/ +#' ├─ x +#' └─ y +#' ``` +#' +#' ```{r reuters-str-true} +#' str(dataset_reuters()) +#' ``` +#' +#' ```{r reuters-str-false} +#' str(dataset_reuters(convert = FALSE)) +#' ``` +#' #' @family datasets #' #' @export dataset_reuters <- function(path = "reuters.npz", num_words = NULL, skip_top = 0L, maxlen = NULL, test_split = 0.2, seed = 113L, start_char = 1L, oov_char = 2L, - index_from = 3L) { - dataset <- keras$datasets$reuters$load_data( - path = path, - num_words = as_nullable_integer(num_words), - skip_top = as.integer(skip_top), - maxlen = as_nullable_integer(maxlen), - test_split = test_split, - seed = as.integer(seed), - start_char = as.integer(start_char), - oov_char = as.integer(oov_char), - index_from = as.integer(index_from) + index_from = 3L, convert = TRUE) { + dataset <- call_dataset_loader( + loader = keras$datasets$reuters$load_data, + convert = convert, + args = list( + path = path, + num_words = as_nullable_integer(num_words), + skip_top = as.integer(skip_top), + maxlen = as_nullable_integer(maxlen), + test_split = test_split, + seed = as.integer(seed), + start_char = as.integer(start_char), + oov_char = as.integer(oov_char), + index_from = as.integer(index_from) + ) ) - as_sequences_dataset_list(dataset) + as_sequences_dataset_list(dataset, convert = convert) } @@ -176,16 +248,29 @@ dataset_reuters_word_index <- function(path = "reuters_word_index.pkl") { #' Dataset of 60,000 28x28 grayscale images of the 10 digits, along with a test set of 10,000 images. #' #' @param path Path where to cache the dataset locally (relative to ~/.keras/datasets). +#' @inheritParams dataset_cifar10 #' #' @returns Lists of training and test data: `train$x, train$y, test$x, test$y`, where #' `x` is an array of grayscale image data with shape (num_samples, 28, 28) and `y` #' is an array of digit labels (integers in range 0-9) with shape (num_samples). #' +#' ```{r mnist-str-true} +#' str(dataset_mnist()) +#' ``` +#' +#' ```{r mnist-str-false} +#' str(dataset_mnist(convert = FALSE)) +#' ``` +#' #' @family datasets #' #' @export -dataset_mnist <- function(path = "mnist.npz") { - dataset <- keras$datasets$mnist$load_data(path) +dataset_mnist <- function(path = "mnist.npz", convert = TRUE) { + dataset <- call_dataset_loader( + loader = keras$datasets$mnist$load_data, + convert = convert, + args = list(path = path) + ) as_dataset_list(dataset) } @@ -224,18 +309,6 @@ dataset_mnist <- function(path = "mnist.npz") { #' values for block groups with few households and many empty houses, #' such as vacation resorts. #' -#' @returns -#' Nested list of arrays: `(x_train, y_train), (x_test, y_test)`. -#' -#' **`x_train`, `x_test`**: arrays with shape `(num_samples, 8)` -#' containing either the training samples (for `x_train`), -#' or test samples (for `y_train`). -#' -#' **`y_train`, `y_test`**: arrays of shape `(num_samples)` -#' containing the target scalars. The targets are float scalars -#' typically between 25,000 and 500,000 that represent -#' the home prices in dollars. -#' #' @param version #' `"small"` or `"large"`. The small version #' contains 600 samples, the large version contains @@ -254,15 +327,32 @@ dataset_mnist <- function(path = "mnist.npz") { #' Random seed for shuffling the data #' before computing the test split. #' +#' @inheritParams dataset_cifar10 +#' +#' @returns +#' Nested list of arrays: `(x_train, y_train), (x_test, y_test)`. +#' +#' ```{r california-housing-str-true} +#' str(dataset_california_housing()) +#' ``` +#' +#' ```{r california-housing-str-false} +#' str(dataset_california_housing(convert = FALSE)) +#' ``` +#' #' @export #' @family datasets #' @tether keras.datasets.california_housing.load_data dataset_california_housing <- function (version = "large", path = "california_housing.npz", - test_split = 0.2, seed = 113L) + test_split = 0.2, seed = 113L, convert = TRUE) { - args <- capture_args(list(seed = as_integer)) - dataset <- do.call(keras$datasets$california_housing$load_data, args) + args <- capture_args(list(seed = as_integer), ignore = "convert") + dataset <- call_dataset_loader( + loader = keras$datasets$california_housing$load_data, + convert = convert, + args = args + ) as_dataset_list(dataset) } @@ -280,6 +370,14 @@ function (version = "large", path = "california_housing.npz", #' `x` is an array of grayscale image data with shape (num_samples, 28, 28) and `y` #' is an array of article labels (integers in range 0-9) with shape (num_samples). #' +#' ```{r fashion-mnist-str-true} +#' str(dataset_fashion_mnist()) +#' ``` +#' +#' ```{r fashion-mnist-str-false} +#' str(dataset_fashion_mnist(convert = FALSE)) +#' ``` +#' #' @details Dataset of 60,000 28x28 grayscale images of 10 fashion categories, #' along with a test set of 10,000 images. This dataset can be used as a drop-in #' replacement for MNIST. The class labels are: @@ -294,12 +392,16 @@ function (version = "large", path = "california_housing.npz", #' * 7 - Sneaker #' * 8 - Bag #' * 9 - Ankle boot +#' @inheritParams dataset_cifar10 #' #' @family datasets #' #' @export -dataset_fashion_mnist <- function() { - dataset <- keras$datasets$fashion_mnist$load_data() +dataset_fashion_mnist <- function(convert = TRUE) { + dataset <- call_dataset_loader( + loader = keras$datasets$fashion_mnist$load_data, + convert = convert + ) as_dataset_list(dataset) } @@ -314,6 +416,7 @@ dataset_fashion_mnist <- function() { #' @param test_split fraction of the data to reserve as test set. #' @param seed Random seed for shuffling the data before computing the test #' split. +#' @inheritParams dataset_cifar10 #' #' @returns Lists of training and test data: `train$x, train$y, test$x, test$y`. #' @@ -321,14 +424,27 @@ dataset_fashion_mnist <- function() { #' the Boston suburbs in the late 1970s. Targets are the median values of the #' houses at a location (in k$). #' +#' ```{r boston-housing-str-true} +#' str(dataset_boston_housing()) +#' ``` +#' +#' ```{r boston-housing-str-false} +#' str(dataset_boston_housing(convert = FALSE)) +#' ``` +#' #' @family datasets #' #' @export -dataset_boston_housing <- function(path = "boston_housing.npz", test_split = 0.2, seed = 113L) { - dataset <- keras$datasets$boston_housing$load_data( - path = path, - seed = as.integer(seed), - test_split = test_split +dataset_boston_housing <- function(path = "boston_housing.npz", test_split = 0.2, seed = 113L, + convert = TRUE) { + dataset <- call_dataset_loader( + loader = keras$datasets$boston_housing$load_data, + convert = convert, + args = list( + path = path, + seed = as.integer(seed), + test_split = test_split + ) ) as_dataset_list(dataset) } @@ -336,6 +452,16 @@ dataset_boston_housing <- function(path = "boston_housing.npz", test_split = 0.2 +call_dataset_loader <- function(loader, convert, args = list()) { + if (convert) { + return(do.call(loader, args)) + } + + dataset <- do.call(r_to_py(loader), args) + iterate(dataset, iterate, simplify = FALSE) +} + + as_dataset_list <- function(dataset) { list( train = list( @@ -349,15 +475,28 @@ as_dataset_list <- function(dataset) { ) } -as_sequences_dataset_list <- function(dataset) { - list( - train = list( - x = lapply(dataset[[1]][[1]], identity), - y = as.integer(dataset[[1]][[2]]) - ), - test = list( - x = lapply(dataset[[2]][[1]], identity), - y = as.integer(dataset[[2]][[2]]) +as_sequences_dataset_list <- function(dataset, convert) { + if (convert) { + list( + train = list( + x = lapply(dataset[[1]][[1]], identity), + y = as.integer(dataset[[1]][[2]]) + ), + test = list( + x = lapply(dataset[[2]][[1]], identity), + y = as.integer(dataset[[2]][[2]]) + ) ) - ) + } else { + list( + train = list( + x = dataset[[1]][[1]], + y = dataset[[1]][[2]] + ), + test = list( + x = dataset[[2]][[1]], + y = dataset[[2]][[2]] + ) + ) + } } diff --git a/man/callback_reduce_lr_on_plateau.Rd b/man/callback_reduce_lr_on_plateau.Rd index dc0459cdb..cefbd6318 100644 --- a/man/callback_reduce_lr_on_plateau.Rd +++ b/man/callback_reduce_lr_on_plateau.Rd @@ -10,7 +10,7 @@ callback_reduce_lr_on_plateau( patience = 10L, verbose = 0L, mode = "auto", - min_delta = 1e-04, + min_delta = 0.0001, cooldown = 0L, min_lr = 0, ... diff --git a/man/dataset_boston_housing.Rd b/man/dataset_boston_housing.Rd index 05df5a5e5..38b151320 100644 --- a/man/dataset_boston_housing.Rd +++ b/man/dataset_boston_housing.Rd @@ -7,7 +7,8 @@ dataset_boston_housing( path = "boston_housing.npz", test_split = 0.2, - seed = 113L + seed = 113L, + convert = TRUE ) } \arguments{ @@ -18,6 +19,9 @@ dataset_boston_housing( \item{seed}{Random seed for shuffling the data before computing the test split.} + +\item{convert}{When \code{TRUE} (default) the datasets are returned as R arrays. +If \code{FALSE}, objects are returned as NumPy arrays.} } \value{ Lists of training and test data: \verb{train$x, train$y, test$x, test$y}. @@ -25,6 +29,32 @@ Lists of training and test data: \verb{train$x, train$y, test$x, test$y}. Samples contain 13 attributes of houses at different locations around the Boston suburbs in the late 1970s. Targets are the median values of the houses at a location (in k$). + +\if{html}{\out{
}}\preformatted{str(dataset_boston_housing()) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## List of 2 +## $ train:List of 2 +## ..$ x: num [1:404, 1:13] 1.2325 0.0218 4.8982 0.0396 3.6931 ... +## ..$ y: num [1:404(1d)] 15.2 42.3 50 21.1 17.7 18.5 11.3 15.6 15.6 14.4 ... +## $ test :List of 2 +## ..$ x: num [1:102, 1:13] 18.0846 0.1233 0.055 1.2735 0.0715 ... +## ..$ y: num [1:102(1d)] 7.2 18.8 19 27 22.2 24.5 31.2 22.9 20.5 23.2 ... + +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{str(dataset_boston_housing(convert = FALSE)) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## List of 2 +## $ train:List of 2 +## ..$ x: +## ..$ y: +## $ test :List of 2 +## ..$ x: +## ..$ y: + +}\if{html}{\out{
}} } \description{ Dataset taken from the StatLib library which is maintained at Carnegie Mellon diff --git a/man/dataset_california_housing.Rd b/man/dataset_california_housing.Rd index d77b77369..3ac4968ac 100644 --- a/man/dataset_california_housing.Rd +++ b/man/dataset_california_housing.Rd @@ -8,7 +8,8 @@ dataset_california_housing( version = "large", path = "california_housing.npz", test_split = 0.2, - seed = 113L + seed = 113L, + convert = TRUE ) } \arguments{ @@ -25,18 +26,38 @@ deprecated \code{boston_housing} dataset.} \item{seed}{Random seed for shuffling the data before computing the test split.} + +\item{convert}{When \code{TRUE} (default) the datasets are returned as R arrays. +If \code{FALSE}, objects are returned as NumPy arrays.} } \value{ Nested list of arrays: \verb{(x_train, y_train), (x_test, y_test)}. -\strong{\code{x_train}, \code{x_test}}: arrays with shape \verb{(num_samples, 8)} -containing either the training samples (for \code{x_train}), -or test samples (for \code{y_train}). +\if{html}{\out{
}}\preformatted{str(dataset_california_housing()) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## List of 2 +## $ train:List of 2 +## ..$ x: num [1:16512, 1:8] -118 -118 -122 -118 -123 ... +## ..$ y: num [1:16512(1d)] 252300 146900 290900 141300 500001 ... +## $ test :List of 2 +## ..$ x: num [1:4128, 1:8] -118 -120 -121 -122 -117 ... +## ..$ y: num [1:4128(1d)] 397900 227900 172100 186500 148900 ... + +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{str(dataset_california_housing(convert = FALSE)) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## List of 2 +## $ train:List of 2 +## ..$ x: +## ..$ y: +## $ test :List of 2 +## ..$ x: +## ..$ y: -\strong{\code{y_train}, \code{y_test}}: arrays of shape \code{(num_samples)} -containing the target scalars. The targets are float scalars -typically between 25,000 and 500,000 that represent -the home prices in dollars. +}\if{html}{\out{
}} } \description{ This dataset was obtained from the \href{https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html}{StatLib repository}. diff --git a/man/dataset_cifar10.Rd b/man/dataset_cifar10.Rd index 1d0c29857..dd8a77ea1 100644 --- a/man/dataset_cifar10.Rd +++ b/man/dataset_cifar10.Rd @@ -4,11 +4,41 @@ \alias{dataset_cifar10} \title{CIFAR10 small image classification} \usage{ -dataset_cifar10() +dataset_cifar10(convert = TRUE) +} +\arguments{ +\item{convert}{When \code{TRUE} (default) the datasets are returned as R arrays. +If \code{FALSE}, objects are returned as NumPy arrays.} } \value{ Lists of training and test data: \verb{train$x, train$y, test$x, test$y}. +\if{html}{\out{
}}\preformatted{str(dataset_cifar10()) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## List of 2 +## $ train:List of 2 +## ..$ x: int [1:50000, 1:32, 1:32, 1:3] 59 154 255 28 170 159 164 28 134 125 ... +## ..$ y: int [1:50000, 1] 6 9 9 4 1 1 2 7 8 3 ... +## $ test :List of 2 +## ..$ x: int [1:10000, 1:32, 1:32, 1:3] 158 235 158 155 65 179 160 83 23 217 ... +## ..$ y: int [1:10000, 1] 3 8 8 0 6 6 1 6 3 1 ... + +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{str(dataset_cifar10(convert = FALSE)) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## List of 2 +## $ train:List of 2 +## ..$ x: +## ..$ y: +## $ test :List of 2 +## ..$ x: +## ..$ y: + +}\if{html}{\out{
}} + The \code{x} data is an array of RGB image data with shape (num_samples, 3, 32, 32). diff --git a/man/dataset_cifar100.Rd b/man/dataset_cifar100.Rd index c7b429b9b..278ace00d 100644 --- a/man/dataset_cifar100.Rd +++ b/man/dataset_cifar100.Rd @@ -4,14 +4,43 @@ \alias{dataset_cifar100} \title{CIFAR100 small image classification} \usage{ -dataset_cifar100(label_mode = c("fine", "coarse")) +dataset_cifar100(label_mode = c("fine", "coarse"), convert = TRUE) } \arguments{ \item{label_mode}{one of "fine", "coarse".} + +\item{convert}{When \code{TRUE} (default) the datasets are returned as R arrays. +If \code{FALSE}, objects are returned as NumPy arrays.} } \value{ Lists of training and test data: \verb{train$x, train$y, test$x, test$y}. +\if{html}{\out{
}}\preformatted{str(dataset_cifar100()) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## List of 2 +## $ train:List of 2 +## ..$ x: int [1:50000, 1:32, 1:32, 1:3] 255 255 250 124 43 190 50 178 122 255 ... +## ..$ y: num [1:50000, 1] 19 29 0 11 1 86 90 28 23 31 ... +## $ test :List of 2 +## ..$ x: int [1:10000, 1:32, 1:32, 1:3] 199 113 61 93 80 168 37 175 233 182 ... +## ..$ y: num [1:10000, 1] 49 33 72 51 71 92 15 14 23 0 ... + +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{str(dataset_cifar100(convert = FALSE)) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## List of 2 +## $ train:List of 2 +## ..$ x: +## ..$ y: +## $ test :List of 2 +## ..$ x: +## ..$ y: + +}\if{html}{\out{
}} + The \code{x} data is an array of RGB image data with shape (num_samples, 3, 32, 32). The \code{y} data is an array of category labels with shape (num_samples). diff --git a/man/dataset_fashion_mnist.Rd b/man/dataset_fashion_mnist.Rd index ade51bb97..c0d06a4fe 100644 --- a/man/dataset_fashion_mnist.Rd +++ b/man/dataset_fashion_mnist.Rd @@ -4,12 +4,42 @@ \alias{dataset_fashion_mnist} \title{Fashion-MNIST database of fashion articles} \usage{ -dataset_fashion_mnist() +dataset_fashion_mnist(convert = TRUE) +} +\arguments{ +\item{convert}{When \code{TRUE} (default) the datasets are returned as R arrays. +If \code{FALSE}, objects are returned as NumPy arrays.} } \value{ Lists of training and test data: \verb{train$x, train$y, test$x, test$y}, where \code{x} is an array of grayscale image data with shape (num_samples, 28, 28) and \code{y} is an array of article labels (integers in range 0-9) with shape (num_samples). + +\if{html}{\out{
}}\preformatted{str(dataset_fashion_mnist()) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## List of 2 +## $ train:List of 2 +## ..$ x: int [1:60000, 1:28, 1:28] 0 0 0 0 0 0 0 0 0 0 ... +## ..$ y: int [1:60000(1d)] 9 0 0 3 0 2 7 2 5 5 ... +## $ test :List of 2 +## ..$ x: int [1:10000, 1:28, 1:28] 0 0 0 0 0 0 0 0 0 0 ... +## ..$ y: int [1:10000(1d)] 9 2 1 1 6 1 4 6 5 7 ... + +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{str(dataset_fashion_mnist(convert = FALSE)) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## List of 2 +## $ train:List of 2 +## ..$ x: +## ..$ y: +## $ test :List of 2 +## ..$ x: +## ..$ y: + +}\if{html}{\out{
}} } \description{ Dataset of 60,000 28x28 grayscale images of the 10 fashion article classes, diff --git a/man/dataset_imdb.Rd b/man/dataset_imdb.Rd index ea71f2190..fc8b5d299 100644 --- a/man/dataset_imdb.Rd +++ b/man/dataset_imdb.Rd @@ -13,7 +13,8 @@ dataset_imdb( seed = 113L, start_char = 1L, oov_char = 2L, - index_from = 3L + index_from = 3L, + convert = TRUE ) dataset_imdb_word_index(path = "imdb_word_index.json") @@ -39,10 +40,21 @@ Set to 1 because 0 is usually the padding character.} \code{skip_top} limit will be replaced with this character.} \item{index_from}{Index actual words with this index and higher.} + +\item{convert}{When \code{TRUE} (default) the datasets are returned as R arrays. +If \code{FALSE}, objects are returned as NumPy arrays.} } \value{ Lists of training and test data: \verb{train$x, train$y, test$x, test$y}. +\if{html}{\out{
}}\preformatted{train/ +├─ x +└─ y +test/ +├─ x +└─ y +}\if{html}{\out{
}} + The \code{x} data includes integer sequences. If the \code{num_words} argument was specific, the maximum possible index value is \code{num_words-1}. If the \code{maxlen} argument was specified, the largest possible sequence length is @@ -50,6 +62,46 @@ specific, the maximum possible index value is \code{num_words-1}. If the The \code{y} data includes a set of integer labels (0 or 1). +\if{html}{\out{
}}\preformatted{str(dataset_imdb()) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## List of 2 +## $ train:List of 2 +## ..$ x:List of 25000 +## .. ..$ : int [1:218] 1 14 22 16 43 530 973 1622 1385 65 ... +## .. ..$ : int [1:189] 1 194 1153 194 8255 78 228 5 6 1463 ... +## .. ..$ : int [1:141] 1 14 47 8 30 31 7 4 249 108 ... +## .. ..$ : int [1:550] 1 4 18609 16085 33 2804 4 2040 432 111 ... +## .. ..$ : int [1:147] 1 249 1323 7 61 113 10 10 13 1637 ... +## .. ..$ : int [1:43] 1 778 128 74 12 630 163 15 4 1766 ... +## .. .. [list output truncated] +## ..$ y: int [1:25000] 1 0 0 1 0 0 1 0 1 0 ... +## $ test :List of 2 +## ..$ x:List of 25000 +## .. ..$ : int [1:68] 1 591 202 14 31 6 717 10 10 18142 ... +## .. ..$ : int [1:260] 1 14 22 3443 6 176 7 5063 88 12 ... +## .. ..$ : int [1:603] 1 111 748 4368 1133 33782 24563 4 87 1551 ... +## .. ..$ : int [1:181] 1 13 1228 119 14 552 7 20 190 14 ... +## .. ..$ : int [1:108] 1 40 49 85 84 1040 146 6 783 254 ... +## .. ..$ : int [1:132] 1 146 427 5718 14 20 218 112 2962 32 ... +## .. .. [list output truncated] +## ..$ y: int [1:25000] 0 1 1 0 1 1 1 0 0 1 ... + +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{str(dataset_imdb(convert = FALSE)) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## List of 2 +## $ train:List of 2 +## ..$ x: +## ..$ y: +## $ test :List of 2 +## ..$ x: +## ..$ y: + +}\if{html}{\out{
}} + The \code{dataset_imdb_word_index()} function returns a list where the names are words and the values are integer. } diff --git a/man/dataset_mnist.Rd b/man/dataset_mnist.Rd index 50b56f4d1..5c3815579 100644 --- a/man/dataset_mnist.Rd +++ b/man/dataset_mnist.Rd @@ -4,15 +4,44 @@ \alias{dataset_mnist} \title{MNIST database of handwritten digits} \usage{ -dataset_mnist(path = "mnist.npz") +dataset_mnist(path = "mnist.npz", convert = TRUE) } \arguments{ \item{path}{Path where to cache the dataset locally (relative to ~/.keras/datasets).} + +\item{convert}{When \code{TRUE} (default) the datasets are returned as R arrays. +If \code{FALSE}, objects are returned as NumPy arrays.} } \value{ Lists of training and test data: \verb{train$x, train$y, test$x, test$y}, where \code{x} is an array of grayscale image data with shape (num_samples, 28, 28) and \code{y} is an array of digit labels (integers in range 0-9) with shape (num_samples). + +\if{html}{\out{
}}\preformatted{str(dataset_mnist()) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## List of 2 +## $ train:List of 2 +## ..$ x: int [1:60000, 1:28, 1:28] 0 0 0 0 0 0 0 0 0 0 ... +## ..$ y: int [1:60000(1d)] 5 0 4 1 9 2 1 3 1 4 ... +## $ test :List of 2 +## ..$ x: int [1:10000, 1:28, 1:28] 0 0 0 0 0 0 0 0 0 0 ... +## ..$ y: int [1:10000(1d)] 7 2 1 0 4 1 4 9 5 9 ... + +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{str(dataset_mnist(convert = FALSE)) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## List of 2 +## $ train:List of 2 +## ..$ x: +## ..$ y: +## $ test :List of 2 +## ..$ x: +## ..$ y: + +}\if{html}{\out{
}} } \description{ Dataset of 60,000 28x28 grayscale images of the 10 digits, along with a test set of 10,000 images. diff --git a/man/dataset_reuters.Rd b/man/dataset_reuters.Rd index 7a79bc0ca..e07172675 100644 --- a/man/dataset_reuters.Rd +++ b/man/dataset_reuters.Rd @@ -14,7 +14,8 @@ dataset_reuters( seed = 113L, start_char = 1L, oov_char = 2L, - index_from = 3L + index_from = 3L, + convert = TRUE ) dataset_reuters_word_index(path = "reuters_word_index.pkl") @@ -42,12 +43,63 @@ Set to 1 because 0 is usually the padding character.} \code{skip_top} limit will be replaced with this character.} \item{index_from}{index actual words with this index and higher.} + +\item{convert}{When \code{TRUE} (default) the datasets are returned as R arrays. +If \code{FALSE}, objects are returned as NumPy arrays.} } \value{ Lists of training and test data: \verb{train$x, train$y, test$x, test$y} with same format as \code{\link[=dataset_imdb]{dataset_imdb()}}. The \code{dataset_reuters_word_index()} function returns a list where the names are words and the values are integer. e.g. \code{word_index[["giraffe"]]} might return \code{1234}. + +\if{html}{\out{
}}\preformatted{train/ +├─ x +└─ y +test/ +├─ x +└─ y +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{str(dataset_reuters()) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## List of 2 +## $ train:List of 2 +## ..$ x:List of 8982 +## .. ..$ : int [1:87] 1 27595 28842 8 43 10 447 5 25 207 ... +## .. ..$ : int [1:56] 1 3267 699 3434 2295 56 16784 7511 9 56 ... +## .. ..$ : int [1:139] 1 53 12 284 15 14 272 26 53 959 ... +## .. ..$ : int [1:224] 1 4 686 867 558 4 37 38 309 2276 ... +## .. ..$ : int [1:101] 1 8295 111 8 25 166 40 638 10 436 ... +## .. ..$ : int [1:116] 1 4 37 38 309 213 349 1632 48 193 ... +## .. .. [list output truncated] +## ..$ y: int [1:8982] 3 4 3 4 4 4 4 3 3 16 ... +## $ test :List of 2 +## ..$ x:List of 2246 +## .. ..$ : int [1:145] 1 4 1378 2025 9 697 4622 111 8 25 ... +## .. ..$ : int [1:745] 1 2768 283 122 7 4 89 544 463 29 ... +## .. ..$ : int [1:228] 1 4 309 2276 4759 5 2015 403 1920 33 ... +## .. ..$ : int [1:172] 1 11786 13716 65 9 249 1096 8 16 515 ... +## .. ..$ : int [1:187] 1 470 354 18270 4231 62 2373 509 1687 5138 ... +## .. ..$ : int [1:80] 1 53 134 26 14 102 26 39 5150 18 ... +## .. .. [list output truncated] +## ..$ y: int [1:2246] 3 10 1 4 4 3 3 3 3 3 ... + +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{str(dataset_reuters(convert = FALSE)) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## List of 2 +## $ train:List of 2 +## ..$ x: +## ..$ y: +## $ test :List of 2 +## ..$ x: +## ..$ y: + +}\if{html}{\out{
}} } \description{ Dataset of 11,228 newswires from Reuters, labeled over 46 topics. As with diff --git a/man/layer_tfsm.Rd b/man/layer_tfsm.Rd index ba6b79213..ef9337506 100644 --- a/man/layer_tfsm.Rd +++ b/man/layer_tfsm.Rd @@ -59,8 +59,8 @@ model |> export_savedmodel("path/to/artifact") ## Output Type: ## TensorSpec(shape=(None, 10), dtype=tf.float32, name=None) ## Captures: -## 127701998122768: TensorSpec(shape=(), dtype=tf.resource, name=None) -## 127701998119504: TensorSpec(shape=(), dtype=tf.resource, name=None) +## 131333390867856: TensorSpec(shape=(), dtype=tf.resource, name=None) +## 131333390868240: TensorSpec(shape=(), dtype=tf.resource, name=None) }\if{html}{\out{}} diff --git a/man/learning_rate_schedule_polynomial_decay.Rd b/man/learning_rate_schedule_polynomial_decay.Rd index 327b60e5c..cccccea16 100644 --- a/man/learning_rate_schedule_polynomial_decay.Rd +++ b/man/learning_rate_schedule_polynomial_decay.Rd @@ -7,7 +7,7 @@ learning_rate_schedule_polynomial_decay( initial_learning_rate, decay_steps, - end_learning_rate = 1e-04, + end_learning_rate = 0.0001, power = 1, cycle = FALSE, name = "PolynomialDecay" diff --git a/man/roxygen/meta.R b/man/roxygen/meta.R index 9b0fb06f7..afb267bb7 100644 --- a/man/roxygen/meta.R +++ b/man/roxygen/meta.R @@ -13,6 +13,15 @@ local({ } if (isNamespaceLoaded('roxygen2')) register_tether_tag_parser() else setHook(packageEvent("roxygen2", "onLoad"), register_tether_tag_parser) + + options( + paged.print = FALSE, + str = utils::strOptions(list.len = 6), + width = 76, + scipen = 1, # default ==0, positive valus bias fixed, negative bias scientific + keras.plot.history.theme_bw = TRUE, + pillar.print_min = 5 + ) }) local({