diff --git a/R/dataset-flickr.R b/R/dataset-flickr.R index 218bf517..d9018e57 100644 --- a/R/dataset-flickr.R +++ b/R/dataset-flickr.R @@ -1,11 +1,10 @@ #' Flickr8k Dataset #' -#' Loads the Flickr8k dataset consisting of 8,000 images with five human-annotated captions per image. -#' The images in this dataset are in RGB format and vary in spatial resolution. -#' -#' The dataset is split into: -#' - `"train"`: training subset with captions. -#' - `"test"`: test subset with captions. +#' The Flickr8k and Flickr30k collections are **image captionning** datasets +#' composed of 8,000 and 30,000 color images respectively, each paired with five +#' human-annotated captions. The images are in RGB format with varying spatial +#' resolutions, and these datasets are widely used for training and evaluating +#' vision-language models. #' #' @inheritParams fgvc_aircraft_dataset #' @param root : Root directory for dataset storage. The dataset will be stored under `root/flickr8k`. @@ -25,17 +24,27 @@ #' first_item <- flickr8k[1] #' first_item$x # image array with shape {3, H, W} #' first_item$y # character vector containing five captions. +#' +#' # Load the Flickr30k caption dataset +#' flickr30k <- flickr30k_caption_dataset(download = TRUE) +#' +#' # Access the first item +#' first_item <- flickr30k[1] +#' first_item$x # image array with shape {3, H, W} +#' first_item$y # character vector containing five captions. #' } #' -#' @name flickr8k_caption_dataset -#' @aliases flickr8k_caption_dataset -#' @title Flickr8k Caption Dataset +#' @name flickr_caption_dataset +#' @title Flickr Caption Datasets +#' @rdname flickr_caption_dataset +#' @family caption_dataset #' @export flickr8k_caption_dataset <- torch::dataset( name = "flickr8k", training_file = "train.rds", test_file = "test.rds", class_index_file = "classes.rds", + archive_size = "1 GB", resources = list( c("https://github.com/jbrownlee/Datasets/releases/download/Flickr8k/Flickr8k_text.zip", "bf6c1abcb8e4a833b7f922104de18627"), @@ -55,10 +64,9 @@ flickr8k_caption_dataset <- torch::dataset( self$target_transform <- target_transform self$train <- train self$split <- if (train) "train" else "test" - - cli_inform("{.cls {class(self)[[1]]}} Dataset (~1GB) will be downloaded and processed if not already cached.") if (download) + cli_inform("{.cls {class(self)[[1]]}} Dataset (~{.emph {self$archive_size}}) will be downloaded and processed if not already available.") self$download() if (!self$check_exists()) @@ -117,7 +125,7 @@ flickr8k_caption_dataset <- torch::dataset( self$captions <- data$captions self$classes <- readRDS(file.path(self$processed_folder, self$class_index_file)) - cli_inform("Split '{self$split}' loaded with {length(self$images)} samples.") + cli_inform("{.cls {class(self)[[1]]}} dataset loaded with {length(self$images)} images across {length(self$classes)} classes.") }, download = function() { @@ -125,7 +133,8 @@ flickr8k_caption_dataset <- torch::dataset( if (self$check_exists()) return() - cli_inform("Downloading {.cls {class(self)[[1]]}} split: '{self$split}'") + cli_inform("Downloading {.cls {class(self)[[1]]}}...") + fs::dir_create(self$raw_folder) for (r in self$resources) { @@ -141,6 +150,9 @@ flickr8k_caption_dataset <- torch::dataset( utils::untar(tar_path, exdir = self$raw_folder) } } + + cli_inform("{.cls {class(self)[[1]]}} dataset downloaded and extracted successfully.") + }, check_processed_exists = function() { @@ -183,13 +195,6 @@ flickr8k_caption_dataset <- torch::dataset( #' Flickr30k Dataset #' -#' Loads the Flickr30k dataset consisting of 30,000 images with five human-annotated captions per image. -#' The images in this dataset are in RGB format and vary in spatial resolution. -#' -#' The dataset is split into: -#' - `"train"`: training subset with captions. -#' - `"test"`: test subset with captions. -#' #' @inheritParams flickr8k_caption_dataset #' @param root Character. Root directory where the dataset will be stored under `root/flickr30k`. #' @@ -198,24 +203,12 @@ flickr8k_caption_dataset <- torch::dataset( #' - `x`: a H x W x 3 integer array representing an RGB image. #' - `y`: a character vector containing all five captions associated with the image. #' -#' @examples -#' \dontrun{ -#' # Load the Flickr30k caption dataset -#' flickr30k <- flickr30k_caption_dataset(download = TRUE) -#' -#' # Access the first item -#' first_item <- flickr30k[1] -#' first_item$x # image array with shape {3, H, W} -#' first_item$y # character vector containing five captions. -#' } -#' -#' @name flickr30k_caption_dataset -#' @aliases flickr30k_caption_dataset -#' @title Flickr30k Caption Dataset +#' @rdname flickr_caption_dataset #' @export flickr30k_caption_dataset <- torch::dataset( name = "flickr30k", inherit = flickr8k_caption_dataset, + archive_size = "4.1 GB", resources = list( c("https://uofi.app.box.com/shared/static/1cpolrtkckn4hxr1zhmfg0ln9veo6jpl.gz", "985ac761bbb52ca49e0c474ae806c07c"), c("https://cs.stanford.edu/people/karpathy/deepimagesent/caption_datasets.zip", "4fa8c08369d22fe16e41dc124bd1adc2") @@ -234,9 +227,8 @@ flickr30k_caption_dataset <- torch::dataset( self$train <- train self$split <- if (train) "train" else "test" - cli_inform("{.cls {class(self)[[1]]}} Dataset (~4.1GB) will be downloaded and processed if not already cached.") - if (download) + cli_inform("{.cls {class(self)[[1]]}} Dataset (~{.emph {self$archive_size}}) will be downloaded and processed if not already available.") self$download() if (!self$check_exists()) @@ -261,7 +253,7 @@ flickr30k_caption_dataset <- torch::dataset( self$captions <- vapply(self$filenames, function(f) caption_to_index[[f]], integer(1)) self$classes <- captions_map - cli_inform("Split '{self$split}' loaded with {length(self$images)} samples.") + cli_inform("{.cls {class(self)[[1]]}} dataset loaded with {length(self$images)} images across {length(self$classes)} classes.") }, check_exists = function() { diff --git a/man/coco_caption_dataset.Rd b/man/coco_caption_dataset.Rd index ea8a66e7..00279070 100644 --- a/man/coco_caption_dataset.Rd +++ b/man/coco_caption_dataset.Rd @@ -56,4 +56,8 @@ plot(as.raster(image_array)) title(main = y, col.main = "black") } } +\seealso{ +Other caption_dataset: +\code{\link{flickr_caption_dataset}} +} \concept{caption_dataset} diff --git a/man/flickr8k_caption_dataset.Rd b/man/flickr8k_caption_dataset.Rd deleted file mode 100644 index 32719aca..00000000 --- a/man/flickr8k_caption_dataset.Rd +++ /dev/null @@ -1,58 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dataset-flickr.R -\name{flickr8k_caption_dataset} -\alias{flickr8k_caption_dataset} -\title{Flickr8k Caption Dataset} -\usage{ -flickr8k_caption_dataset( - root = tempdir(), - train = TRUE, - transform = NULL, - target_transform = NULL, - download = FALSE -) -} -\arguments{ -\item{root}{: Root directory for dataset storage. The dataset will be stored under \code{root/flickr8k}.} - -\item{train}{: If \code{TRUE}, loads the training set. If \code{FALSE}, loads the test set. Default is \code{TRUE}.} - -\item{transform}{Optional function to transform input images after loading. Default is \code{NULL}.} - -\item{target_transform}{Optional function to transform labels. Default is \code{NULL}.} - -\item{download}{Logical. Whether to download the dataset if not found locally. Default is \code{FALSE}.} -} -\value{ -A torch dataset of class \code{flickr8k_caption_dataset}. -Each element is a named list: -\itemize{ -\item \code{x}: a H x W x 3 integer array representing an RGB image. -\item \code{y}: a character vector containing all five captions associated with the image. -} -} -\description{ -Flickr8k Dataset -} -\details{ -Loads the Flickr8k dataset consisting of 8,000 images with five human-annotated captions per image. -The images in this dataset are in RGB format and vary in spatial resolution. - -The dataset is split into: -\itemize{ -\item \code{"train"}: training subset with captions. -\item \code{"test"}: test subset with captions. -} -} -\examples{ -\dontrun{ -# Load the Flickr8k caption dataset -flickr8k <- flickr8k_caption_dataset(download = TRUE) - -# Access the first item -first_item <- flickr8k[1] -first_item$x # image array with shape {3, H, W} -first_item$y # character vector containing five captions. -} - -} diff --git a/man/flickr30k_caption_dataset.Rd b/man/flickr_caption_dataset.Rd similarity index 53% rename from man/flickr30k_caption_dataset.Rd rename to man/flickr_caption_dataset.Rd index c38c3773..074b89b8 100644 --- a/man/flickr30k_caption_dataset.Rd +++ b/man/flickr_caption_dataset.Rd @@ -1,9 +1,19 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dataset-flickr.R -\name{flickr30k_caption_dataset} +\name{flickr_caption_dataset} +\alias{flickr_caption_dataset} +\alias{flickr8k_caption_dataset} \alias{flickr30k_caption_dataset} -\title{Flickr30k Caption Dataset} +\title{Flickr Caption Datasets} \usage{ +flickr8k_caption_dataset( + root = tempdir(), + train = TRUE, + transform = NULL, + target_transform = NULL, + download = FALSE +) + flickr30k_caption_dataset( root = tempdir(), train = TRUE, @@ -24,6 +34,13 @@ flickr30k_caption_dataset( \item{download}{Logical. Whether to download the dataset if not found locally. Default is \code{FALSE}.} } \value{ +A torch dataset of class \code{flickr8k_caption_dataset}. +Each element is a named list: +\itemize{ +\item \code{x}: a H x W x 3 integer array representing an RGB image. +\item \code{y}: a character vector containing all five captions associated with the image. +} + A torch dataset of class \code{flickr30k_caption_dataset}. Each element is a named list: \itemize{ @@ -32,20 +49,25 @@ Each element is a named list: } } \description{ -Flickr30k Dataset +Flickr8k Dataset } \details{ -Loads the Flickr30k dataset consisting of 30,000 images with five human-annotated captions per image. -The images in this dataset are in RGB format and vary in spatial resolution. - -The dataset is split into: -\itemize{ -\item \code{"train"}: training subset with captions. -\item \code{"test"}: test subset with captions. -} +The Flickr8k and Flickr30k collections are \strong{image captionning} datasets +composed of 8,000 and 30,000 color images respectively, each paired with five +human-annotated captions. The images are in RGB format with varying spatial +resolutions, and these datasets are widely used for training and evaluating +vision-language models. } \examples{ \dontrun{ +# Load the Flickr8k caption dataset +flickr8k <- flickr8k_caption_dataset(download = TRUE) + +# Access the first item +first_item <- flickr8k[1] +first_item$x # image array with shape {3, H, W} +first_item$y # character vector containing five captions. + # Load the Flickr30k caption dataset flickr30k <- flickr30k_caption_dataset(download = TRUE) @@ -56,3 +78,8 @@ first_item$y # character vector containing five captions. } } +\seealso{ +Other caption_dataset: +\code{\link{coco_caption_dataset}()} +} +\concept{caption_dataset} diff --git a/tests/testthat/test-dataset-flickr.R b/tests/testthat/test-dataset-flickr.R index 0a3cf81e..3fad0b6a 100644 --- a/tests/testthat/test-dataset-flickr.R +++ b/tests/testthat/test-dataset-flickr.R @@ -5,10 +5,9 @@ t <- withr::local_tempdir() test_that("tests for the flickr8k dataset for train split", { skip_on_cran() - expect_error( - flickr8k <- flickr8k_caption_dataset(root = tempfile()), - class = "rlang_error" - ) + skip_if(Sys.getenv("TEST_LARGE_DATASETS", unset = 0) != 1, + "Skipping test: set TEST_LARGE_DATASETS=1 to enable tests requiring large downloads.") + flickr8k <- flickr8k_caption_dataset(root = t, train = TRUE, download = TRUE) expect_length(flickr8k, 6000) @@ -27,6 +26,9 @@ test_that("tests for the flickr8k dataset for train split", { test_that("tests for the flickr8k dataset for test split", { skip_on_cran() + skip_if(Sys.getenv("TEST_LARGE_DATASETS", unset = 0) != 1, + "Skipping test: set TEST_LARGE_DATASETS=1 to enable tests requiring large downloads.") + flickr8k <- flickr8k_caption_dataset(root = t, train = FALSE) expect_length(flickr8k, 1000) first_item <- flickr8k[1] @@ -43,6 +45,9 @@ test_that("tests for the flickr8k dataset for test split", { test_that("tests for the flickr8k dataset for dataloader", { skip_on_cran() + + skip_if(Sys.getenv("TEST_LARGE_DATASETS", unset = 0) != 1, + "Skipping test: set TEST_LARGE_DATASETS=1 to enable tests requiring large downloads.") flickr8k <- flickr8k_caption_dataset( root = t, @@ -70,10 +75,9 @@ test_that("tests for the flickr8k dataset for dataloader", { test_that("tests for the flickr30k dataset for train split", { skip_on_cran() - expect_error( - flickr30k <- flickr30k_caption_dataset(root = tempfile()), - class = "rlang_error" - ) + skip_if(Sys.getenv("TEST_LARGE_DATASETS", unset = 0) != 1, + "Skipping test: set TEST_LARGE_DATASETS=1 to enable tests requiring large downloads.") + flickr30k <- flickr30k_caption_dataset(root = t, train = TRUE, download = TRUE) expect_length(flickr30k, 29000) @@ -93,6 +97,9 @@ test_that("tests for the flickr30k dataset for train split", { test_that("tests for the flickr30k dataset for test split", { skip_on_cran() + skip_if(Sys.getenv("TEST_LARGE_DATASETS", unset = 0) != 1, + "Skipping test: set TEST_LARGE_DATASETS=1 to enable tests requiring large downloads.") + flickr30k <- flickr30k_caption_dataset(root = t, train = FALSE) expect_length(flickr30k, 1000) first_item <- flickr30k[1] @@ -110,6 +117,9 @@ test_that("tests for the flickr30k dataset for test split", { test_that("tests for the flickr30k dataset for dataloader", { skip_on_cran() + skip_if(Sys.getenv("TEST_LARGE_DATASETS", unset = 0) != 1, + "Skipping test: set TEST_LARGE_DATASETS=1 to enable tests requiring large downloads.") + flickr30k <- flickr30k_caption_dataset( root = t, transform = function(x) {