From e2d93c7f8902f56400523bdd023d15f8c7cd1497 Mon Sep 17 00:00:00 2001 From: "C. Regouby" Date: Fri, 15 Aug 2025 10:19:36 +0200 Subject: [PATCH 1/3] Improve messages and translations with 'split' infromation Initial addition of whoi_small_coralnet --- NAMESPACE | 1 + R/dataset-flowers.R | 2 +- R/dataset-plankton.R | 39 ++++++++++++++++++++++++++--- _pkgdown.yml | 1 + man/whoi_plankton_dataset.Rd | 8 +++++- man/whoi_small_coralnet_dataset.Rd | 25 +++++++++++++++++++ po/R-fr.po | 40 +++++++++++++++++++----------- po/R-torchvision.pot | 21 ++++++++++------ 8 files changed, 110 insertions(+), 27 deletions(-) create mode 100644 man/whoi_small_coralnet_dataset.Rd diff --git a/NAMESPACE b/NAMESPACE index 96c2221d..324f716e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -189,6 +189,7 @@ export(transform_to_tensor) export(transform_vflip) export(vision_make_grid) export(whoi_plankton_dataset) +export(whoi_small_coralnet_dataset) export(whoi_small_plankton_dataset) importFrom(grDevices,dev.off) importFrom(graphics,polygon) diff --git a/R/dataset-flowers.R b/R/dataset-flowers.R index ce4b006e..3e11fe58 100644 --- a/R/dataset-flowers.R +++ b/R/dataset-flowers.R @@ -99,7 +99,7 @@ flowers102_dataset <- dataset( meta <- readRDS(file.path(self$processed_folder, glue::glue("{self$split}.rds"))) self$img_path <- meta$img_path self$labels <- meta$labels - cli_inform("Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} loaded with {length(self$img_path)} samples.") + cli_inform("Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} loaded with {self$.length()} samples.") }, .getitem = function(index) { diff --git a/R/dataset-plankton.R b/R/dataset-plankton.R index a7909378..3fd823d0 100644 --- a/R/dataset-plankton.R +++ b/R/dataset-plankton.R @@ -2,7 +2,13 @@ #' WHOI-Plankton Dataset #' #' The WHOI-Plankton and WHOI-Plankton small are **image classification** datasets -#' of submarine plankton small grayscale images of varying size, classified into 100 classes. +#' from the Woods Hole Oceanographic Institution (WHOI) of microscopic marine plankton. +#' https://hdl.handle.net/10.1575/1912/7341 +#' Images were collected in situ by automated submersible imaging-in-flow cytometry +#' with an instrument called Imaging FlowCytobot (IFCB). They are small grayscale images +#' of varying size. +#' Images are classified into 100 classes, with an overview available in +#' [project Wiki page](https://whoigit.github.io/whoi-plankton/) #' Dataset size is 957k and 58k respectively, and each provides a train / val / test split. #' #' @inheritParams eurosat_dataset @@ -66,6 +72,7 @@ whoi_small_plankton_dataset <- torch::dataset( install.packages("prettyunits") } + self$split <- match.arg(split, c("train", "val", "test")) self$transform <- transform self$target_transform <- target_transform self$archive_url <- self$resources[self$resources$split == split,]$url @@ -74,7 +81,7 @@ whoi_small_plankton_dataset <- torch::dataset( self$split_file <- sapply(self$archive_url, \(x) file.path(rappdirs::user_cache_dir("torch"), class(self)[1], sub("\\?download=.*", "", basename(x)))) if (download) { - cli_inform("Dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if not already available.") + cli_inform("Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if not already available.") self$download() } @@ -127,7 +134,7 @@ whoi_small_plankton_dataset <- torch::dataset( #' WHOI-Plankton Dataset #' -#' @inheritParams whoi_plankton_dataset#' +#' @inheritParams whoi_plankton_dataset #' @rdname whoi_plankton_dataset #' @export whoi_plankton_dataset <- torch::dataset( @@ -162,3 +169,29 @@ whoi_plankton_dataset <- torch::dataset( size = c(rep(450e6, 4), rep(490e6, 13), rep(450e6, 2)) ) ) + + +#' Coralnet Dataset +#' +#' [CoralNet](https://coralnet.ucsd.edu) is a resource for benthic images classification. +#' +#' @inheritParams whoi_plankton_dataset +#' @export +whoi_small_coralnet_dataset <- torch::dataset( + name = "whoi_small_coralnet", + inherit = whoi_small_plankton_dataset, + archive_size = "2.1 GB", + resources = data.frame( + split = c("test", rep("train", 4), "val"), + url = c("https://huggingface.co/datasets/nf-whoi/coralnet-small/resolve/main/data/test-00000-of-00001.parquet?download=true", + paste0("https://huggingface.co/datasets/nf-whoi/coralnet-small/resolve/main/data/train-0000",0:3,"-of-00004.parquet?download=true"), + "https://huggingface.co/datasets/nf-whoi/coralnet-small/resolve/main/data/validation-00000-of-00001.parquet?download=true"), + md5 = c("cd41b344ec4b6af83e39c38e19f09190", + "aa0965c0e59f7b1cddcb3c565d80edf3", + "b2a75513f1a084724e100678d8ee7180", + "a03c4d52758078bfb0799894926d60f6", + "07eaff140f39868a8bcb1d3c02ebe60f", + "87c927b9fbe0c327b7b9ae18388b4fcf"), + size = c(430e6, rep(380e6, 4), 192e6) + ) +) diff --git a/_pkgdown.yml b/_pkgdown.yml index 2c8ee2c6..550a46e6 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -53,6 +53,7 @@ reference: - ends_with("1_dataset") - ends_with("2_dataset") - ends_with("5_dataset") + - ends_with("kton_dataset") - subtitle: for Object Detection descr: > Dataset having items with "y" as a named list of bounding-box and diff --git a/man/whoi_plankton_dataset.Rd b/man/whoi_plankton_dataset.Rd index f9f886ad..4a5ff305 100644 --- a/man/whoi_plankton_dataset.Rd +++ b/man/whoi_plankton_dataset.Rd @@ -45,7 +45,13 @@ WHOI-Plankton Dataset } \details{ The WHOI-Plankton and WHOI-Plankton small are \strong{image classification} datasets -of submarine plankton small grayscale images of varying size, classified into 100 classes. +from the Woods Hole Oceanographic Institution (WHOI) of microscopic marine plankton. +https://hdl.handle.net/10.1575/1912/7341 +Images were collected in situ by automated submersible imaging-in-flow cytometry +with an instrument called Imaging FlowCytobot (IFCB). They are small grayscale images +of varying size. +Images are classified into 100 classes, with an overview available in +\href{https://whoigit.github.io/whoi-plankton/}{project Wiki page} Dataset size is 957k and 58k respectively, and each provides a train / val / test split. } \examples{ diff --git a/man/whoi_small_coralnet_dataset.Rd b/man/whoi_small_coralnet_dataset.Rd new file mode 100644 index 00000000..06e31194 --- /dev/null +++ b/man/whoi_small_coralnet_dataset.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataset-plankton.R +\name{whoi_small_coralnet_dataset} +\alias{whoi_small_coralnet_dataset} +\title{Coralnet Dataset} +\usage{ +whoi_small_coralnet_dataset( + split = "val", + transform = NULL, + target_transform = NULL, + download = FALSE +) +} +\arguments{ +\item{split}{One of \code{"train"}, \code{"val"}, or \code{"test"}. Default is \code{"val"}.} + +\item{transform}{Optional. A function that takes an image and returns a transformed version (e.g., normalization, cropping).} + +\item{target_transform}{Optional. A function that transforms the label.} + +\item{download}{Logical. If TRUE, downloads the dataset to \verb{root/}. If the dataset is already present, download is skipped.} +} +\description{ +Coralnet Dataset +} diff --git a/po/R-fr.po b/po/R-fr.po index 1dfa33f4..be964bda 100644 --- a/po/R-fr.po +++ b/po/R-fr.po @@ -1,8 +1,8 @@ msgid "" msgstr "" "Project-Id-Version: torchvision 0.7.0.9000\n" -"POT-Creation-Date: 2025-08-10 20:26+0200\n" -"PO-Revision-Date: 2025-08-10 20:35+0200\n" +"POT-Creation-Date: 2025-08-15 10:15+0200\n" +"PO-Revision-Date: 2025-08-15 10:18+0200\n" "Last-Translator: Christophe Regouby \n" "Language-Team: \n" "Language: fr\n" @@ -46,12 +46,12 @@ msgstr "" #: dataset-lfw.R:246 dataset-mnist.R:86 dataset-mnist.R:234 dataset-mnist.R:421 #: dataset-oxfordiiitpet.R:71 dataset-oxfordiiitpet.R:283 #: dataset-oxfordiiitpet.R:347 dataset-pascal.R:136 dataset-pascal.R:294 -#: dataset-places365.R:98 dataset-plankton.R:71 +#: dataset-places365.R:98 msgid "" "Dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be " "downloaded and processed if not already available." msgstr "" -"Le jeu de données {.cls {class(self)[[1]]}} de taille (~{.emph " +"Le jeu de données {.cls {class(self)[[1]]}} (de taille ~{.emph " "{self$archive_size}}) sera téléchargé et traité s'il n'est pas déjà " "disponible." @@ -60,7 +60,7 @@ msgstr "" #: dataset-flowers.R:97 dataset-lfw.R:113 dataset-lfw.R:251 dataset-mnist.R:91 #: dataset-mnist.R:239 dataset-mnist.R:426 dataset-oxfordiiitpet.R:76 #: dataset-oxfordiiitpet.R:288 dataset-oxfordiiitpet.R:352 dataset-pascal.R:141 -#: dataset-pascal.R:299 dataset-places365.R:103 dataset-plankton.R:76 +#: dataset-pascal.R:299 dataset-places365.R:103 dataset-plankton.R:89 msgid "Dataset not found. You can use `download = TRUE` to download it." msgstr "" "Jeu de données introuvable. Veuillez ajouter `download = TRUE` pour le " @@ -69,7 +69,7 @@ msgstr "" #: dataset-caltech.R:84 dataset-caltech.R:208 dataset-lfw.R:135 #: dataset-lfw.R:286 dataset-oxfordiiitpet.R:95 dataset-oxfordiiitpet.R:302 #: dataset-oxfordiiitpet.R:366 dataset-pascal.R:149 dataset-pascal.R:317 -#: dataset-plankton.R:83 +#: dataset-plankton.R:93 msgid "" "{.cls {class(self)[[1]]}} dataset loaded with {self$.length()} images across " "{length(self$classes)} classes." @@ -79,7 +79,7 @@ msgstr "" #: dataset-caltech.R:117 dataset-coco.R:187 dataset-eurosat.R:81 #: dataset-flickr.R:136 dataset-lfw.R:146 dataset-oxfordiiitpet.R:107 -#: dataset-pascal.R:158 dataset-plankton.R:90 +#: dataset-pascal.R:158 dataset-plankton.R:100 msgid "Downloading {.cls {class(self)[[1]]}}..." msgstr "Téléchargement de {.cls {class(self)[[1]]}}..." @@ -87,7 +87,7 @@ msgstr "Téléchargement de {.cls {class(self)[[1]]}}..." #: dataset-eurosat.R:88 dataset-fer.R:129 dataset-flowers.R:136 #: dataset-lfw.R:157 dataset-lfw.R:172 dataset-mnist.R:121 dataset-mnist.R:264 #: dataset-mnist.R:453 dataset-oxfordiiitpet.R:115 dataset-pascal.R:168 -#: dataset-places365.R:182 dataset-plankton.R:95 models-vit.R:49 +#: dataset-places365.R:182 dataset-plankton.R:105 models-vit.R:49 msgid "Corrupt file! Delete the file in {archive} and try again." msgstr "Fichier corrompu. Supprimez le fichier {archive} et recommencez." @@ -160,7 +160,7 @@ msgstr "Extraction de l'archive {.cls {class(self)[[1]]}} terminée..." #: dataset-eurosat.R:98 msgid "Downloading {.cls {class(self)[[1]]}} split file: {self$split_url}" msgstr "" -"Téléchargement du fichier de split de {.cls {class(self)[[1]]}} depuis " +"Téléchargement du fichier de partition de {.cls {class(self)[[1]]}} depuis " "{self$split_url} ..." #: dataset-eurosat.R:102 @@ -214,18 +214,18 @@ msgstr "" #: dataset-flowers.R:102 msgid "" "Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} loaded with " -"{length(self$img_path)} samples." +"{self$.length()} samples." msgstr "" -"Le sous-ensemble {.val {self$split}} du jeu de données {.cls {class(self)" -"[[1]]}} chargé avec {length(self$img_path)} images." +"La partition {.val {self$split}} du jeu de données {.cls {class(self)[[1]]}} " +"est chargée avec {length(self$img_path)} images." #: dataset-flowers.R:126 msgid "" "Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} is already " "processed and cached." msgstr "" -"Le sous-ensemble {.val {self$split}} du jeu de données {.cls {class(self)" -"[[1]]}} a déjà été préparé et mis en cache." +"La partition {.val {self$split}} du jeu de données {.cls {class(self)[[1]]}} " +"a déjà été préparé et mis en cache." #: dataset-flowers.R:140 msgid "{.cls {class(self)[[1]]}} Extracting images and processing dataset..." @@ -262,9 +262,19 @@ msgstr "Partage non valide : {self$split}" #: dataset-places365.R:163 msgid "Downloading {.cls {class(self)[[1]]}} split '{self$split}'..." msgstr "" -"Téléchargement du fichier de partage '{self$split}' de {.cls {class(self)" +"Téléchargement du fichier de partition '{self$split}' de {.cls {class(self)" "[[1]]} ..." +#: dataset-plankton.R:84 +msgid "" +"Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} (~{.emph " +"{self$archive_size}}) will be downloaded and processed if not already " +"available." +msgstr "" +"La partition {.val {self$split}} du jeu de données {.cls {class(self)[[1]]}} " +"(de taille ~{.emph {self$archive_size}}) sera téléchargée et traitée si elle " +"n'est pas déjà disponible." + #: extension.R:2 msgid "" "has_ops() Not implemented yet. https://github.com/pytorch/vision/blob/" diff --git a/po/R-torchvision.pot b/po/R-torchvision.pot index 3b0dbb0f..e94e2ef6 100644 --- a/po/R-torchvision.pot +++ b/po/R-torchvision.pot @@ -1,7 +1,7 @@ msgid "" msgstr "" "Project-Id-Version: torchvision 0.7.0.9000\n" -"POT-Creation-Date: 2025-08-10 20:26+0200\n" +"POT-Creation-Date: 2025-08-15 10:15+0200\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -38,27 +38,27 @@ msgstr "" msgid "deprecated" msgstr "" -#: dataset-caltech.R:61 dataset-cifar.R:52 dataset-coco.R:85 dataset-coco.R:305 dataset-eurosat.R:57 dataset-fer.R:63 dataset-fgvc.R:91 dataset-flickr.R:69 dataset-flickr.R:231 dataset-flowers.R:92 dataset-lfw.R:108 dataset-lfw.R:246 dataset-mnist.R:86 dataset-mnist.R:234 dataset-mnist.R:421 dataset-oxfordiiitpet.R:71 dataset-oxfordiiitpet.R:283 dataset-oxfordiiitpet.R:347 dataset-pascal.R:136 dataset-pascal.R:294 dataset-places365.R:98 dataset-plankton.R:71 +#: dataset-caltech.R:61 dataset-cifar.R:52 dataset-coco.R:85 dataset-coco.R:305 dataset-eurosat.R:57 dataset-fer.R:63 dataset-fgvc.R:91 dataset-flickr.R:69 dataset-flickr.R:231 dataset-flowers.R:92 dataset-lfw.R:108 dataset-lfw.R:246 dataset-mnist.R:86 dataset-mnist.R:234 dataset-mnist.R:421 dataset-oxfordiiitpet.R:71 dataset-oxfordiiitpet.R:283 dataset-oxfordiiitpet.R:347 dataset-pascal.R:136 dataset-pascal.R:294 dataset-places365.R:98 msgid "" "Dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be " "downloaded and processed if not already available." msgstr "" -#: dataset-caltech.R:66 dataset-caltech.R:187 dataset-coco.R:90 dataset-coco.R:310 dataset-eurosat.R:64 dataset-fer.R:70 dataset-flowers.R:97 dataset-lfw.R:113 dataset-lfw.R:251 dataset-mnist.R:91 dataset-mnist.R:239 dataset-mnist.R:426 dataset-oxfordiiitpet.R:76 dataset-oxfordiiitpet.R:288 dataset-oxfordiiitpet.R:352 dataset-pascal.R:141 dataset-pascal.R:299 dataset-places365.R:103 dataset-plankton.R:76 +#: dataset-caltech.R:66 dataset-caltech.R:187 dataset-coco.R:90 dataset-coco.R:310 dataset-eurosat.R:64 dataset-fer.R:70 dataset-flowers.R:97 dataset-lfw.R:113 dataset-lfw.R:251 dataset-mnist.R:91 dataset-mnist.R:239 dataset-mnist.R:426 dataset-oxfordiiitpet.R:76 dataset-oxfordiiitpet.R:288 dataset-oxfordiiitpet.R:352 dataset-pascal.R:141 dataset-pascal.R:299 dataset-places365.R:103 dataset-plankton.R:89 msgid "Dataset not found. You can use `download = TRUE` to download it." msgstr "" -#: dataset-caltech.R:84 dataset-caltech.R:208 dataset-lfw.R:135 dataset-lfw.R:286 dataset-oxfordiiitpet.R:95 dataset-oxfordiiitpet.R:302 dataset-oxfordiiitpet.R:366 dataset-pascal.R:149 dataset-pascal.R:317 dataset-plankton.R:83 +#: dataset-caltech.R:84 dataset-caltech.R:208 dataset-lfw.R:135 dataset-lfw.R:286 dataset-oxfordiiitpet.R:95 dataset-oxfordiiitpet.R:302 dataset-oxfordiiitpet.R:366 dataset-pascal.R:149 dataset-pascal.R:317 dataset-plankton.R:93 msgid "" "{.cls {class(self)[[1]]}} dataset loaded with {self$.length()} images across " "{length(self$classes)} classes." msgstr "" -#: dataset-caltech.R:117 dataset-coco.R:187 dataset-eurosat.R:81 dataset-flickr.R:136 dataset-lfw.R:146 dataset-oxfordiiitpet.R:107 dataset-pascal.R:158 dataset-plankton.R:90 +#: dataset-caltech.R:117 dataset-coco.R:187 dataset-eurosat.R:81 dataset-flickr.R:136 dataset-lfw.R:146 dataset-oxfordiiitpet.R:107 dataset-pascal.R:158 dataset-plankton.R:100 msgid "Downloading {.cls {class(self)[[1]]}}..." msgstr "" -#: dataset-caltech.R:125 dataset-cifar.R:111 dataset-coco.R:193 dataset-eurosat.R:88 dataset-fer.R:129 dataset-flowers.R:136 dataset-lfw.R:157 dataset-lfw.R:172 dataset-mnist.R:121 dataset-mnist.R:264 dataset-mnist.R:453 dataset-oxfordiiitpet.R:115 dataset-pascal.R:168 dataset-places365.R:182 dataset-plankton.R:95 models-vit.R:49 +#: dataset-caltech.R:125 dataset-cifar.R:111 dataset-coco.R:193 dataset-eurosat.R:88 dataset-fer.R:129 dataset-flowers.R:136 dataset-lfw.R:157 dataset-lfw.R:172 dataset-mnist.R:121 dataset-mnist.R:264 dataset-mnist.R:453 dataset-oxfordiiitpet.R:115 dataset-pascal.R:168 dataset-places365.R:182 dataset-plankton.R:105 models-vit.R:49 msgid "Corrupt file! Delete the file in {archive} and try again." msgstr "" @@ -158,7 +158,7 @@ msgstr "" #: dataset-flowers.R:102 msgid "" "Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} loaded with " -"{length(self$img_path)} samples." +"{self$.length()} samples." msgstr "" #: dataset-flowers.R:126 @@ -198,6 +198,13 @@ msgstr "" msgid "Downloading {.cls {class(self)[[1]]}} split '{self$split}'..." msgstr "" +#: dataset-plankton.R:84 +msgid "" +"Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} (~{.emph " +"{self$archive_size}}) will be downloaded and processed if not already " +"available." +msgstr "" + #: extension.R:2 msgid "" "has_ops() Not implemented yet. https://github.com/pytorch/vision/blob/" From 2bd4b1320aee2cc18aef2d3ecb2937991f4d1ed1 Mon Sep 17 00:00:00 2001 From: "C. Regouby" Date: Fri, 15 Aug 2025 11:06:18 +0200 Subject: [PATCH 2/3] fix md5 values and add tests --- NEWS.md | 2 +- R/dataset-plankton.R | 24 +++++++++----- man/whoi_small_coralnet_dataset.Rd | 5 ++- tests/testthat/test-dataset-plankton.R | 45 ++++++++++++++++++++++++-- 4 files changed, 64 insertions(+), 12 deletions(-) diff --git a/NEWS.md b/NEWS.md index 17bb22d9..54ad9b53 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,7 +5,7 @@ * Added `lfw_people_dataset()` and `lfw_pairs_dataset()` for loading Labelled Faces in the Wild (LFW) datasets (@DerrickUnleashed, #203). * Added `places365_dataset()`for loading the Places365 dataset (@koshtiakanksha, #196). * Added `pascal_segmentation_dataset()`, and `pascal_detection_dataset()` for loading the Pascal Visual Object Classes datasets (@DerrickUnleashed, #209). -* Added `whoi_plankton_dataset()`, and `whoi_small_plankton_dataset()` (@cregouby, #236). +* Added `whoi_plankton_dataset()`, `whoi_small_plankton_dataset()`, and `whoi_small_coral_dataset()` (@cregouby, #236). ## New models diff --git a/R/dataset-plankton.R b/R/dataset-plankton.R index 3fd823d0..13676d9a 100644 --- a/R/dataset-plankton.R +++ b/R/dataset-plankton.R @@ -113,7 +113,12 @@ whoi_small_plankton_dataset <- torch::dataset( .getitem = function(index) { df <- self$.data[index,]$to_data_frame() - x <- df$image$bytes %>% unlist() %>% as.raw() %>% png::readPNG() + x_raw <- df$image$bytes %>% unlist() %>% as.raw() + if (tolower(tools::file_ext(df$image$path)) == "jpg") { + x <- jpeg::readJPEG(x_raw) + } else { + x <- png::readPNG(x_raw) + } y <- df$label + 1L if (!is.null(self$transform)) @@ -173,7 +178,10 @@ whoi_plankton_dataset <- torch::dataset( #' Coralnet Dataset #' -#' [CoralNet](https://coralnet.ucsd.edu) is a resource for benthic images classification. +#' Small Coralnet dataset is an image **classification dataset** +#' of very large submarine coral reef images annotated into 3 classes +#' and produced by [CoralNet](https://coralnet.ucsd.edu), +#' a resource for benthic images classification. #' #' @inheritParams whoi_plankton_dataset #' @export @@ -186,12 +194,12 @@ whoi_small_coralnet_dataset <- torch::dataset( url = c("https://huggingface.co/datasets/nf-whoi/coralnet-small/resolve/main/data/test-00000-of-00001.parquet?download=true", paste0("https://huggingface.co/datasets/nf-whoi/coralnet-small/resolve/main/data/train-0000",0:3,"-of-00004.parquet?download=true"), "https://huggingface.co/datasets/nf-whoi/coralnet-small/resolve/main/data/validation-00000-of-00001.parquet?download=true"), - md5 = c("cd41b344ec4b6af83e39c38e19f09190", - "aa0965c0e59f7b1cddcb3c565d80edf3", - "b2a75513f1a084724e100678d8ee7180", - "a03c4d52758078bfb0799894926d60f6", - "07eaff140f39868a8bcb1d3c02ebe60f", - "87c927b9fbe0c327b7b9ae18388b4fcf"), + md5 = c("f9a3ce864fdbeb5f1f3d243fe1285186", + "82269e2251db22ef213e438126198afd", + "82d2cafbad7740e476310565a2bcd44e", + "f4dd2d2effc1f9c02918e3ee614b85d3", + "d66ec691a4c5c63878a9cfff164a6aaf", + "7ea146b9b2f7b6cee99092bd44182d06"), size = c(430e6, rep(380e6, 4), 192e6) ) ) diff --git a/man/whoi_small_coralnet_dataset.Rd b/man/whoi_small_coralnet_dataset.Rd index 06e31194..bbdc3e89 100644 --- a/man/whoi_small_coralnet_dataset.Rd +++ b/man/whoi_small_coralnet_dataset.Rd @@ -21,5 +21,8 @@ whoi_small_coralnet_dataset( \item{download}{Logical. If TRUE, downloads the dataset to \verb{root/}. If the dataset is already present, download is skipped.} } \description{ -Coralnet Dataset +Small Coralnet dataset is an image \strong{classification dataset} +of very large submarine coral reef images annotated into 3 classes +and produced by \href{https://coralnet.ucsd.edu}{CoralNet}, +a resource for benthic images classification. } diff --git a/tests/testthat/test-dataset-plankton.R b/tests/testthat/test-dataset-plankton.R index be88ab61..f1feba88 100644 --- a/tests/testthat/test-dataset-plankton.R +++ b/tests/testthat/test-dataset-plankton.R @@ -1,4 +1,4 @@ -context("dataset-eurosat") +context("dataset-plankton") test_that("whoi_small_plankton_dataset downloads correctly whatever the split", { @@ -27,7 +27,7 @@ test_that("whoi_small_plankton_dataset downloads correctly whatever the split", first_item <- val_ds[1] expect_tensor_shape(first_item$x, c(1,145, 230)) - # classification of the first item is "48: Leptocylindrus" + # classification of the first item is "47: Leegaardiella_ovalis" expect_equal(first_item$y, 47L) expect_equal(val_ds$classes[first_item$y], "Leegaardiella_ovalis") @@ -39,6 +39,47 @@ test_that("whoi_small_plankton_dataset downloads correctly whatever the split", }) + +test_that("whoi_small_coralnet_dataset downloads correctly whatever the split", { + skip_on_cran() + skip_if_not_installed("torch") + skip_if(Sys.getenv("TEST_LARGE_DATASETS", unset = 0) != 1, + "Skipping test: set TEST_LARGE_DATASETS=1 to enable tests requiring large downloads.") + + expect_error( + whoi_small_coralnet_dataset(split = "test", download = FALSE), + "Dataset not found. You can use `download = TRUE`", + label = "Dataset should fail if not previously downloaded" + ) + + expect_no_error( + train_ds <- whoi_small_coralnet_dataset(split = "train", download = TRUE) + ) + + expect_is(train_ds, "dataset", "train should be a dataset") + # Train dataset should have exactly 314 samples + expect_equal(train_ds$.length(), 314) + + expect_no_error( + val_ds <- whoi_small_coralnet_dataset(split = "val", download = TRUE, transform = transform_to_tensor) + ) + # Validation dataset should have exactly 45 samples + expect_equal(val_ds$.length(), 45) + + first_item <- val_ds[1] + expect_tensor_shape(first_item$x, c(3, 3000, 4000)) + # classification of the first item is "1: diploria_labrinthyformis" + expect_equal(first_item$y, 1L) + expect_equal(val_ds$classes[first_item$y], "diploria_labrinthyformis") + + expect_no_error( + test_ds <- whoi_small_coralnet_dataset(split = "test", download = TRUE) + ) + # Test dataset should have exactly 91 samples + expect_equal(test_ds$.length(), 91) + +}) + test_that("whoi_small_plankton_dataset derivatives download and prepare correctly", { skip_on_cran() skip_if_not_installed("torch") From 8c2b804073ce30546d9c7b3f6516b7d0a7655790 Mon Sep 17 00:00:00 2001 From: "C. Regouby" Date: Fri, 15 Aug 2025 11:07:23 +0200 Subject: [PATCH 3/3] update translations --- inst/po/fr/LC_MESSAGES/R-torchvision.mo | Bin 17580 -> 18865 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/inst/po/fr/LC_MESSAGES/R-torchvision.mo b/inst/po/fr/LC_MESSAGES/R-torchvision.mo index fb3b3b54cbee836a94f149c29896c9d4d985827a..af99fdc3da80216644b08915547c22b2671fc837 100644 GIT binary patch delta 3448 zcmZ|Q4{Q_H9l-JD2AYyU0)f!{gS?PH5`sPFA0%NZN%+qK2?-F|49LYkVi(6YK9f{S zjafTY>PjcZjjdZXb*`@1uN zuUq)jq~9x=b4JlDdIkNZ9HoAYy}2A{m*y$ehL>?CevT$? z%TuZy&u9GzyP2=ZSL!+jQSKX_uT&R)52x^Ld;oVZ;5(E`t2Y?LII*~ZMe$*b;G0;F zorOv@;}~wnCsEG-89On5p;A4#9qaHUeg%JyE%*V}VA&!zhZ}G)j$%3Ms|g0HIdK{{ z;463pzrYC$7r7hy69$>@Q-ebr=2%%7WYPc0+mshnQc+tyo$@xY0mK%_}J1_ZhZfZK<20 z5nRsv8BA*iml?F<`?wA(%9Qfr9&EvfP|jaK{?y+&NUrnB-35A4zVJbmlst*8_zueb zg%p--WD_pKBuXSsR}lXIgV#90+NzqX@_|kca>Fo6N*=>4_$!nR6tUW4=tar-%Q%Sd zV<)bybVHuRYUa-&m(&~Bg&!m3rB>^-o6C0Xe$YgstRBt!6O>3?z$*Lz`!JUYdsag@ zj7gO1Z*q_geu}Gb0Tm}j+kyl54cvvl!Ea+ln(ZBB@KcnkFQsxM6^~#yUPAezJ60+c z$9k0F{1JA0l)96Po0xB_RcZ%5juZF??7}|6Bc8%-_&e<3zA|2+eazDbD18~cgl90H z(iWdad9rmR>NwiC7mEnTLl{EI;ZWBR<6o>9Yx+K^#oSoIh5g)I*a*KMWo4^qrRmv_XbTxp( zIEhjNZy{+{e??iiWTRUXU0A_<6eTiglwy1t1Nb3Ilt^8~No?Ov*s+>kN#}N_ zNfh?bWs}klu)g!JcM#=CrR}0GqRSIYm~U+oEosH{d+B$|gf@>(7M;SKgR&`U@|z$L zxY71AFzAWwiF6JF>csgGl5=JBg>>E;=Z7VPl8PHG%78a)rsXrLp;NO?E9BTkFQscb zftqQ#j3ob3TM_qcT8+5_C!E=#^M9I1Iw?7nJ-Gm-s+Z8IAC*TB(!+GAdTH`DNI^2-eXmSR2q ze!3JCzhzE~WtC!HmL1pOeEKL|iq~n98~L%I-ki3VBR^;}t$@+W?CdU-*KBuoz6$02 zmNrgrpfAsO=UtoYGZKMNDy8EIGnp{LQ8TC~&G1+#Y3aJ-Q$BM%9@10c=;7FK^MO>V z-u4$Z*o}q$g4!v|j2y19;*oGNRhwxnZ1j|Gk4_koa8NrZJM^ue$QTRLIraauM8Z6J zESxZP^w{{JK*X>t)6$)~snNc&_?kU7|33RWOU^BD7V<^R=vXpTm)TW3m9zHdZV!zb zfg_`_s5zp;W6@Yb_8&5%dgSIu_{Kd0dpD2xGOw3Bm%C(8wybCG-!=+L^XgODGRET( z)5`4DDbMV6?P%3trgg=`o;qK<-|x@s@AD3r6JaYHi+VR1NwY&Y`I}n3{x)x8t8Vmn zG`89AR{wt4es6!w42F~5!Pv1xz}y%z5|)|l(0li6@wR1NsQEM}zb>Crx)eW78XByJ><;Us&z{W(vq zsqkp~!-nd)&PVL-rWKhrYu0`~**P7(Bor_Y+&3Tu<7*ng|5oPFc?>}k&; z(@N?9p&gUkj0rPf|GKI&^Ko<0oX(qNa7(4g=bdVqKC2_Kgmv_o`Q-)kuXSJCWe@7I znqHH17*2#{SI&)f*}1KImM*4=Cn YuoaI*!=t2KcAR}%X3G54o=-gg1u)f8RR910 delta 2621 zcmZ|QYitx%6u|Mb>{415`dH{fTe>Z!v_M(9K!LWjbSbn&p}b0YRUWo12rW{qAOy;4 zF)>0w?G+^^^3W1BAPQqpq8NoljKn75^8+CokQj+t0%C$1{hyhI_{E!?`Q4eFxp(H= zbK4Wfb?j+Z^s>*hPiQT~5MsMqL@!*5<3I}~h!o>kti^R0!S}EP1AQGA<5bQ&@iu;i z`rHMtNEki+M7CiSZo!XnhDcOO`-^Pg!cLrpH?al7i6SHMO&pKk#?HM-B9)v^z!A6w z3-K@(;TM>TamgY%Sbzg?HjctZtiV0k%>42>oy}Y*NpU8A8&`3j>EmJSz=?Pqb>bhf z97iy!PP7`caW{^|W2i578S}BvK#?(6i29<-aRu(hX6Bb)>F5MYQ=L>jhl@DBh&4FM zFVcXIpr*VVb)fh`PKt|BQ@;@dXkj*9M7{qePQu_|0wY+22BYb8KBc1rTtOXpc$za% zJ?fV}iWBf8>VrR_-j~V4nOKXXu^F3iKXQxw75m+v;oM(@n)+2(ja?b!pL^sw7j|PD z6{xA}#5#N*^#zicMN?ag!!U~arLSTb&m!d^ZZ7JQ`B3khf+R~CV{S#wz_U0MKMaun zIds0~f>wVh%Sm-D>J4w8zTgGS!)sWD{j(`3tixUS3U0(854GVzEWrU(;8d(def}A2 z#Z%af`B7do;9@OsIu}k-QTO8?xEU*lJ70J@<{f0)$Ye&V7Zx3n) z&LU%rcbsDh&fizHFHta$D@|N+8jn`4{t75Q5-T$3*besK-E3uqX*R?x<)9^dg z2ZoTImADr5i$6nFLvEqopH6zJZ~^MRe+e~XXE|ix9n@O$lO7(C)i{FrBqJ64oByWkA!@0U=&+`~aL7skQ37c=lRd=bwfs~`sH%v))V%tD>G9f#r(9D?VOca*D0c}Rdkn{Xy- z#@@oY_;m&O*Hq?(oge0+uJLBnS~!Y}@HQU7*;AdhyoFl5ZKO$;<~Hh5P2-?n_7t|` zDSQlzri-}nZfl~m);@_k`}6J(ch#@frh_#ST5#HQGM3S<>}J$Gt(nqv>x<_QeTc<` zwg5rB*^N3BR>;nrE=MM@jQQ;3dpeeA_?DZ!rs zyGHclgG35(AE7%*+k9d;5hhXzZ3_t98FnrTpuc_M9O`2InwUP+YSlKINGH+=-NRy? zNx0G598C6^6{)?AH+i(RDcR++I#YJIO?m1s=A3`M`6scTIU4j@;Xzy6R!@3YA9E-- z-uf+Pi7WP#&-jz_O(PH!eFADlHms>hiv^YVvQmO?SZyR(HYk zz09G)fcbJvmZzw(|NxXtE$-NF-_A3TgS_;#2a65x;aoCXA*}cT63!ByUa(U{oEF|{;WCW F`WL}EM3Vpj