diff --git a/NAMESPACE b/NAMESPACE index 96c2221d..324f716e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -189,6 +189,7 @@ export(transform_to_tensor) export(transform_vflip) export(vision_make_grid) export(whoi_plankton_dataset) +export(whoi_small_coralnet_dataset) export(whoi_small_plankton_dataset) importFrom(grDevices,dev.off) importFrom(graphics,polygon) diff --git a/NEWS.md b/NEWS.md index 17bb22d9..54ad9b53 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,7 +5,7 @@ * Added `lfw_people_dataset()` and `lfw_pairs_dataset()` for loading Labelled Faces in the Wild (LFW) datasets (@DerrickUnleashed, #203). * Added `places365_dataset()`for loading the Places365 dataset (@koshtiakanksha, #196). * Added `pascal_segmentation_dataset()`, and `pascal_detection_dataset()` for loading the Pascal Visual Object Classes datasets (@DerrickUnleashed, #209). -* Added `whoi_plankton_dataset()`, and `whoi_small_plankton_dataset()` (@cregouby, #236). +* Added `whoi_plankton_dataset()`, `whoi_small_plankton_dataset()`, and `whoi_small_coral_dataset()` (@cregouby, #236). ## New models diff --git a/R/dataset-flowers.R b/R/dataset-flowers.R index ce4b006e..3e11fe58 100644 --- a/R/dataset-flowers.R +++ b/R/dataset-flowers.R @@ -99,7 +99,7 @@ flowers102_dataset <- dataset( meta <- readRDS(file.path(self$processed_folder, glue::glue("{self$split}.rds"))) self$img_path <- meta$img_path self$labels <- meta$labels - cli_inform("Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} loaded with {length(self$img_path)} samples.") + cli_inform("Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} loaded with {self$.length()} samples.") }, .getitem = function(index) { diff --git a/R/dataset-plankton.R b/R/dataset-plankton.R index a7909378..13676d9a 100644 --- a/R/dataset-plankton.R +++ b/R/dataset-plankton.R @@ -2,7 +2,13 @@ #' WHOI-Plankton Dataset #' #' The WHOI-Plankton and WHOI-Plankton small are **image classification** datasets -#' of submarine plankton small grayscale images of varying size, classified into 100 classes. +#' from the Woods Hole Oceanographic Institution (WHOI) of microscopic marine plankton. +#' https://hdl.handle.net/10.1575/1912/7341 +#' Images were collected in situ by automated submersible imaging-in-flow cytometry +#' with an instrument called Imaging FlowCytobot (IFCB). They are small grayscale images +#' of varying size. +#' Images are classified into 100 classes, with an overview available in +#' [project Wiki page](https://whoigit.github.io/whoi-plankton/) #' Dataset size is 957k and 58k respectively, and each provides a train / val / test split. #' #' @inheritParams eurosat_dataset @@ -66,6 +72,7 @@ whoi_small_plankton_dataset <- torch::dataset( install.packages("prettyunits") } + self$split <- match.arg(split, c("train", "val", "test")) self$transform <- transform self$target_transform <- target_transform self$archive_url <- self$resources[self$resources$split == split,]$url @@ -74,7 +81,7 @@ whoi_small_plankton_dataset <- torch::dataset( self$split_file <- sapply(self$archive_url, \(x) file.path(rappdirs::user_cache_dir("torch"), class(self)[1], sub("\\?download=.*", "", basename(x)))) if (download) { - cli_inform("Dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if not already available.") + cli_inform("Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if not already available.") self$download() } @@ -106,7 +113,12 @@ whoi_small_plankton_dataset <- torch::dataset( .getitem = function(index) { df <- self$.data[index,]$to_data_frame() - x <- df$image$bytes %>% unlist() %>% as.raw() %>% png::readPNG() + x_raw <- df$image$bytes %>% unlist() %>% as.raw() + if (tolower(tools::file_ext(df$image$path)) == "jpg") { + x <- jpeg::readJPEG(x_raw) + } else { + x <- png::readPNG(x_raw) + } y <- df$label + 1L if (!is.null(self$transform)) @@ -127,7 +139,7 @@ whoi_small_plankton_dataset <- torch::dataset( #' WHOI-Plankton Dataset #' -#' @inheritParams whoi_plankton_dataset#' +#' @inheritParams whoi_plankton_dataset #' @rdname whoi_plankton_dataset #' @export whoi_plankton_dataset <- torch::dataset( @@ -162,3 +174,32 @@ whoi_plankton_dataset <- torch::dataset( size = c(rep(450e6, 4), rep(490e6, 13), rep(450e6, 2)) ) ) + + +#' Coralnet Dataset +#' +#' Small Coralnet dataset is an image **classification dataset** +#' of very large submarine coral reef images annotated into 3 classes +#' and produced by [CoralNet](https://coralnet.ucsd.edu), +#' a resource for benthic images classification. +#' +#' @inheritParams whoi_plankton_dataset +#' @export +whoi_small_coralnet_dataset <- torch::dataset( + name = "whoi_small_coralnet", + inherit = whoi_small_plankton_dataset, + archive_size = "2.1 GB", + resources = data.frame( + split = c("test", rep("train", 4), "val"), + url = c("https://huggingface.co/datasets/nf-whoi/coralnet-small/resolve/main/data/test-00000-of-00001.parquet?download=true", + paste0("https://huggingface.co/datasets/nf-whoi/coralnet-small/resolve/main/data/train-0000",0:3,"-of-00004.parquet?download=true"), + "https://huggingface.co/datasets/nf-whoi/coralnet-small/resolve/main/data/validation-00000-of-00001.parquet?download=true"), + md5 = c("f9a3ce864fdbeb5f1f3d243fe1285186", + "82269e2251db22ef213e438126198afd", + "82d2cafbad7740e476310565a2bcd44e", + "f4dd2d2effc1f9c02918e3ee614b85d3", + "d66ec691a4c5c63878a9cfff164a6aaf", + "7ea146b9b2f7b6cee99092bd44182d06"), + size = c(430e6, rep(380e6, 4), 192e6) + ) +) diff --git a/_pkgdown.yml b/_pkgdown.yml index 2c8ee2c6..550a46e6 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -53,6 +53,7 @@ reference: - ends_with("1_dataset") - ends_with("2_dataset") - ends_with("5_dataset") + - ends_with("kton_dataset") - subtitle: for Object Detection descr: > Dataset having items with "y" as a named list of bounding-box and diff --git a/inst/po/fr/LC_MESSAGES/R-torchvision.mo b/inst/po/fr/LC_MESSAGES/R-torchvision.mo index fb3b3b54..af99fdc3 100644 Binary files a/inst/po/fr/LC_MESSAGES/R-torchvision.mo and b/inst/po/fr/LC_MESSAGES/R-torchvision.mo differ diff --git a/man/whoi_plankton_dataset.Rd b/man/whoi_plankton_dataset.Rd index f9f886ad..4a5ff305 100644 --- a/man/whoi_plankton_dataset.Rd +++ b/man/whoi_plankton_dataset.Rd @@ -45,7 +45,13 @@ WHOI-Plankton Dataset } \details{ The WHOI-Plankton and WHOI-Plankton small are \strong{image classification} datasets -of submarine plankton small grayscale images of varying size, classified into 100 classes. +from the Woods Hole Oceanographic Institution (WHOI) of microscopic marine plankton. +https://hdl.handle.net/10.1575/1912/7341 +Images were collected in situ by automated submersible imaging-in-flow cytometry +with an instrument called Imaging FlowCytobot (IFCB). They are small grayscale images +of varying size. +Images are classified into 100 classes, with an overview available in +\href{https://whoigit.github.io/whoi-plankton/}{project Wiki page} Dataset size is 957k and 58k respectively, and each provides a train / val / test split. } \examples{ diff --git a/man/whoi_small_coralnet_dataset.Rd b/man/whoi_small_coralnet_dataset.Rd new file mode 100644 index 00000000..bbdc3e89 --- /dev/null +++ b/man/whoi_small_coralnet_dataset.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataset-plankton.R +\name{whoi_small_coralnet_dataset} +\alias{whoi_small_coralnet_dataset} +\title{Coralnet Dataset} +\usage{ +whoi_small_coralnet_dataset( + split = "val", + transform = NULL, + target_transform = NULL, + download = FALSE +) +} +\arguments{ +\item{split}{One of \code{"train"}, \code{"val"}, or \code{"test"}. Default is \code{"val"}.} + +\item{transform}{Optional. A function that takes an image and returns a transformed version (e.g., normalization, cropping).} + +\item{target_transform}{Optional. A function that transforms the label.} + +\item{download}{Logical. If TRUE, downloads the dataset to \verb{root/}. If the dataset is already present, download is skipped.} +} +\description{ +Small Coralnet dataset is an image \strong{classification dataset} +of very large submarine coral reef images annotated into 3 classes +and produced by \href{https://coralnet.ucsd.edu}{CoralNet}, +a resource for benthic images classification. +} diff --git a/po/R-fr.po b/po/R-fr.po index 1dfa33f4..be964bda 100644 --- a/po/R-fr.po +++ b/po/R-fr.po @@ -1,8 +1,8 @@ msgid "" msgstr "" "Project-Id-Version: torchvision 0.7.0.9000\n" -"POT-Creation-Date: 2025-08-10 20:26+0200\n" -"PO-Revision-Date: 2025-08-10 20:35+0200\n" +"POT-Creation-Date: 2025-08-15 10:15+0200\n" +"PO-Revision-Date: 2025-08-15 10:18+0200\n" "Last-Translator: Christophe Regouby \n" "Language-Team: \n" "Language: fr\n" @@ -46,12 +46,12 @@ msgstr "" #: dataset-lfw.R:246 dataset-mnist.R:86 dataset-mnist.R:234 dataset-mnist.R:421 #: dataset-oxfordiiitpet.R:71 dataset-oxfordiiitpet.R:283 #: dataset-oxfordiiitpet.R:347 dataset-pascal.R:136 dataset-pascal.R:294 -#: dataset-places365.R:98 dataset-plankton.R:71 +#: dataset-places365.R:98 msgid "" "Dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be " "downloaded and processed if not already available." msgstr "" -"Le jeu de données {.cls {class(self)[[1]]}} de taille (~{.emph " +"Le jeu de données {.cls {class(self)[[1]]}} (de taille ~{.emph " "{self$archive_size}}) sera téléchargé et traité s'il n'est pas déjà " "disponible." @@ -60,7 +60,7 @@ msgstr "" #: dataset-flowers.R:97 dataset-lfw.R:113 dataset-lfw.R:251 dataset-mnist.R:91 #: dataset-mnist.R:239 dataset-mnist.R:426 dataset-oxfordiiitpet.R:76 #: dataset-oxfordiiitpet.R:288 dataset-oxfordiiitpet.R:352 dataset-pascal.R:141 -#: dataset-pascal.R:299 dataset-places365.R:103 dataset-plankton.R:76 +#: dataset-pascal.R:299 dataset-places365.R:103 dataset-plankton.R:89 msgid "Dataset not found. You can use `download = TRUE` to download it." msgstr "" "Jeu de données introuvable. Veuillez ajouter `download = TRUE` pour le " @@ -69,7 +69,7 @@ msgstr "" #: dataset-caltech.R:84 dataset-caltech.R:208 dataset-lfw.R:135 #: dataset-lfw.R:286 dataset-oxfordiiitpet.R:95 dataset-oxfordiiitpet.R:302 #: dataset-oxfordiiitpet.R:366 dataset-pascal.R:149 dataset-pascal.R:317 -#: dataset-plankton.R:83 +#: dataset-plankton.R:93 msgid "" "{.cls {class(self)[[1]]}} dataset loaded with {self$.length()} images across " "{length(self$classes)} classes." @@ -79,7 +79,7 @@ msgstr "" #: dataset-caltech.R:117 dataset-coco.R:187 dataset-eurosat.R:81 #: dataset-flickr.R:136 dataset-lfw.R:146 dataset-oxfordiiitpet.R:107 -#: dataset-pascal.R:158 dataset-plankton.R:90 +#: dataset-pascal.R:158 dataset-plankton.R:100 msgid "Downloading {.cls {class(self)[[1]]}}..." msgstr "Téléchargement de {.cls {class(self)[[1]]}}..." @@ -87,7 +87,7 @@ msgstr "Téléchargement de {.cls {class(self)[[1]]}}..." #: dataset-eurosat.R:88 dataset-fer.R:129 dataset-flowers.R:136 #: dataset-lfw.R:157 dataset-lfw.R:172 dataset-mnist.R:121 dataset-mnist.R:264 #: dataset-mnist.R:453 dataset-oxfordiiitpet.R:115 dataset-pascal.R:168 -#: dataset-places365.R:182 dataset-plankton.R:95 models-vit.R:49 +#: dataset-places365.R:182 dataset-plankton.R:105 models-vit.R:49 msgid "Corrupt file! Delete the file in {archive} and try again." msgstr "Fichier corrompu. Supprimez le fichier {archive} et recommencez." @@ -160,7 +160,7 @@ msgstr "Extraction de l'archive {.cls {class(self)[[1]]}} terminée..." #: dataset-eurosat.R:98 msgid "Downloading {.cls {class(self)[[1]]}} split file: {self$split_url}" msgstr "" -"Téléchargement du fichier de split de {.cls {class(self)[[1]]}} depuis " +"Téléchargement du fichier de partition de {.cls {class(self)[[1]]}} depuis " "{self$split_url} ..." #: dataset-eurosat.R:102 @@ -214,18 +214,18 @@ msgstr "" #: dataset-flowers.R:102 msgid "" "Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} loaded with " -"{length(self$img_path)} samples." +"{self$.length()} samples." msgstr "" -"Le sous-ensemble {.val {self$split}} du jeu de données {.cls {class(self)" -"[[1]]}} chargé avec {length(self$img_path)} images." +"La partition {.val {self$split}} du jeu de données {.cls {class(self)[[1]]}} " +"est chargée avec {length(self$img_path)} images." #: dataset-flowers.R:126 msgid "" "Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} is already " "processed and cached." msgstr "" -"Le sous-ensemble {.val {self$split}} du jeu de données {.cls {class(self)" -"[[1]]}} a déjà été préparé et mis en cache." +"La partition {.val {self$split}} du jeu de données {.cls {class(self)[[1]]}} " +"a déjà été préparé et mis en cache." #: dataset-flowers.R:140 msgid "{.cls {class(self)[[1]]}} Extracting images and processing dataset..." @@ -262,9 +262,19 @@ msgstr "Partage non valide : {self$split}" #: dataset-places365.R:163 msgid "Downloading {.cls {class(self)[[1]]}} split '{self$split}'..." msgstr "" -"Téléchargement du fichier de partage '{self$split}' de {.cls {class(self)" +"Téléchargement du fichier de partition '{self$split}' de {.cls {class(self)" "[[1]]} ..." +#: dataset-plankton.R:84 +msgid "" +"Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} (~{.emph " +"{self$archive_size}}) will be downloaded and processed if not already " +"available." +msgstr "" +"La partition {.val {self$split}} du jeu de données {.cls {class(self)[[1]]}} " +"(de taille ~{.emph {self$archive_size}}) sera téléchargée et traitée si elle " +"n'est pas déjà disponible." + #: extension.R:2 msgid "" "has_ops() Not implemented yet. https://github.com/pytorch/vision/blob/" diff --git a/po/R-torchvision.pot b/po/R-torchvision.pot index 3b0dbb0f..e94e2ef6 100644 --- a/po/R-torchvision.pot +++ b/po/R-torchvision.pot @@ -1,7 +1,7 @@ msgid "" msgstr "" "Project-Id-Version: torchvision 0.7.0.9000\n" -"POT-Creation-Date: 2025-08-10 20:26+0200\n" +"POT-Creation-Date: 2025-08-15 10:15+0200\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -38,27 +38,27 @@ msgstr "" msgid "deprecated" msgstr "" -#: dataset-caltech.R:61 dataset-cifar.R:52 dataset-coco.R:85 dataset-coco.R:305 dataset-eurosat.R:57 dataset-fer.R:63 dataset-fgvc.R:91 dataset-flickr.R:69 dataset-flickr.R:231 dataset-flowers.R:92 dataset-lfw.R:108 dataset-lfw.R:246 dataset-mnist.R:86 dataset-mnist.R:234 dataset-mnist.R:421 dataset-oxfordiiitpet.R:71 dataset-oxfordiiitpet.R:283 dataset-oxfordiiitpet.R:347 dataset-pascal.R:136 dataset-pascal.R:294 dataset-places365.R:98 dataset-plankton.R:71 +#: dataset-caltech.R:61 dataset-cifar.R:52 dataset-coco.R:85 dataset-coco.R:305 dataset-eurosat.R:57 dataset-fer.R:63 dataset-fgvc.R:91 dataset-flickr.R:69 dataset-flickr.R:231 dataset-flowers.R:92 dataset-lfw.R:108 dataset-lfw.R:246 dataset-mnist.R:86 dataset-mnist.R:234 dataset-mnist.R:421 dataset-oxfordiiitpet.R:71 dataset-oxfordiiitpet.R:283 dataset-oxfordiiitpet.R:347 dataset-pascal.R:136 dataset-pascal.R:294 dataset-places365.R:98 msgid "" "Dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be " "downloaded and processed if not already available." msgstr "" -#: dataset-caltech.R:66 dataset-caltech.R:187 dataset-coco.R:90 dataset-coco.R:310 dataset-eurosat.R:64 dataset-fer.R:70 dataset-flowers.R:97 dataset-lfw.R:113 dataset-lfw.R:251 dataset-mnist.R:91 dataset-mnist.R:239 dataset-mnist.R:426 dataset-oxfordiiitpet.R:76 dataset-oxfordiiitpet.R:288 dataset-oxfordiiitpet.R:352 dataset-pascal.R:141 dataset-pascal.R:299 dataset-places365.R:103 dataset-plankton.R:76 +#: dataset-caltech.R:66 dataset-caltech.R:187 dataset-coco.R:90 dataset-coco.R:310 dataset-eurosat.R:64 dataset-fer.R:70 dataset-flowers.R:97 dataset-lfw.R:113 dataset-lfw.R:251 dataset-mnist.R:91 dataset-mnist.R:239 dataset-mnist.R:426 dataset-oxfordiiitpet.R:76 dataset-oxfordiiitpet.R:288 dataset-oxfordiiitpet.R:352 dataset-pascal.R:141 dataset-pascal.R:299 dataset-places365.R:103 dataset-plankton.R:89 msgid "Dataset not found. You can use `download = TRUE` to download it." msgstr "" -#: dataset-caltech.R:84 dataset-caltech.R:208 dataset-lfw.R:135 dataset-lfw.R:286 dataset-oxfordiiitpet.R:95 dataset-oxfordiiitpet.R:302 dataset-oxfordiiitpet.R:366 dataset-pascal.R:149 dataset-pascal.R:317 dataset-plankton.R:83 +#: dataset-caltech.R:84 dataset-caltech.R:208 dataset-lfw.R:135 dataset-lfw.R:286 dataset-oxfordiiitpet.R:95 dataset-oxfordiiitpet.R:302 dataset-oxfordiiitpet.R:366 dataset-pascal.R:149 dataset-pascal.R:317 dataset-plankton.R:93 msgid "" "{.cls {class(self)[[1]]}} dataset loaded with {self$.length()} images across " "{length(self$classes)} classes." msgstr "" -#: dataset-caltech.R:117 dataset-coco.R:187 dataset-eurosat.R:81 dataset-flickr.R:136 dataset-lfw.R:146 dataset-oxfordiiitpet.R:107 dataset-pascal.R:158 dataset-plankton.R:90 +#: dataset-caltech.R:117 dataset-coco.R:187 dataset-eurosat.R:81 dataset-flickr.R:136 dataset-lfw.R:146 dataset-oxfordiiitpet.R:107 dataset-pascal.R:158 dataset-plankton.R:100 msgid "Downloading {.cls {class(self)[[1]]}}..." msgstr "" -#: dataset-caltech.R:125 dataset-cifar.R:111 dataset-coco.R:193 dataset-eurosat.R:88 dataset-fer.R:129 dataset-flowers.R:136 dataset-lfw.R:157 dataset-lfw.R:172 dataset-mnist.R:121 dataset-mnist.R:264 dataset-mnist.R:453 dataset-oxfordiiitpet.R:115 dataset-pascal.R:168 dataset-places365.R:182 dataset-plankton.R:95 models-vit.R:49 +#: dataset-caltech.R:125 dataset-cifar.R:111 dataset-coco.R:193 dataset-eurosat.R:88 dataset-fer.R:129 dataset-flowers.R:136 dataset-lfw.R:157 dataset-lfw.R:172 dataset-mnist.R:121 dataset-mnist.R:264 dataset-mnist.R:453 dataset-oxfordiiitpet.R:115 dataset-pascal.R:168 dataset-places365.R:182 dataset-plankton.R:105 models-vit.R:49 msgid "Corrupt file! Delete the file in {archive} and try again." msgstr "" @@ -158,7 +158,7 @@ msgstr "" #: dataset-flowers.R:102 msgid "" "Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} loaded with " -"{length(self$img_path)} samples." +"{self$.length()} samples." msgstr "" #: dataset-flowers.R:126 @@ -198,6 +198,13 @@ msgstr "" msgid "Downloading {.cls {class(self)[[1]]}} split '{self$split}'..." msgstr "" +#: dataset-plankton.R:84 +msgid "" +"Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} (~{.emph " +"{self$archive_size}}) will be downloaded and processed if not already " +"available." +msgstr "" + #: extension.R:2 msgid "" "has_ops() Not implemented yet. https://github.com/pytorch/vision/blob/" diff --git a/tests/testthat/test-dataset-plankton.R b/tests/testthat/test-dataset-plankton.R index be88ab61..f1feba88 100644 --- a/tests/testthat/test-dataset-plankton.R +++ b/tests/testthat/test-dataset-plankton.R @@ -1,4 +1,4 @@ -context("dataset-eurosat") +context("dataset-plankton") test_that("whoi_small_plankton_dataset downloads correctly whatever the split", { @@ -27,7 +27,7 @@ test_that("whoi_small_plankton_dataset downloads correctly whatever the split", first_item <- val_ds[1] expect_tensor_shape(first_item$x, c(1,145, 230)) - # classification of the first item is "48: Leptocylindrus" + # classification of the first item is "47: Leegaardiella_ovalis" expect_equal(first_item$y, 47L) expect_equal(val_ds$classes[first_item$y], "Leegaardiella_ovalis") @@ -39,6 +39,47 @@ test_that("whoi_small_plankton_dataset downloads correctly whatever the split", }) + +test_that("whoi_small_coralnet_dataset downloads correctly whatever the split", { + skip_on_cran() + skip_if_not_installed("torch") + skip_if(Sys.getenv("TEST_LARGE_DATASETS", unset = 0) != 1, + "Skipping test: set TEST_LARGE_DATASETS=1 to enable tests requiring large downloads.") + + expect_error( + whoi_small_coralnet_dataset(split = "test", download = FALSE), + "Dataset not found. You can use `download = TRUE`", + label = "Dataset should fail if not previously downloaded" + ) + + expect_no_error( + train_ds <- whoi_small_coralnet_dataset(split = "train", download = TRUE) + ) + + expect_is(train_ds, "dataset", "train should be a dataset") + # Train dataset should have exactly 314 samples + expect_equal(train_ds$.length(), 314) + + expect_no_error( + val_ds <- whoi_small_coralnet_dataset(split = "val", download = TRUE, transform = transform_to_tensor) + ) + # Validation dataset should have exactly 45 samples + expect_equal(val_ds$.length(), 45) + + first_item <- val_ds[1] + expect_tensor_shape(first_item$x, c(3, 3000, 4000)) + # classification of the first item is "1: diploria_labrinthyformis" + expect_equal(first_item$y, 1L) + expect_equal(val_ds$classes[first_item$y], "diploria_labrinthyformis") + + expect_no_error( + test_ds <- whoi_small_coralnet_dataset(split = "test", download = TRUE) + ) + # Test dataset should have exactly 91 samples + expect_equal(test_ds$.length(), 91) + +}) + test_that("whoi_small_plankton_dataset derivatives download and prepare correctly", { skip_on_cran() skip_if_not_installed("torch")