Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ export(transform_to_tensor)
export(transform_vflip)
export(vision_make_grid)
export(whoi_plankton_dataset)
export(whoi_small_coralnet_dataset)
export(whoi_small_plankton_dataset)
importFrom(grDevices,dev.off)
importFrom(graphics,polygon)
Expand Down
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* Added `lfw_people_dataset()` and `lfw_pairs_dataset()` for loading Labelled Faces in the Wild (LFW) datasets (@DerrickUnleashed, #203).
* Added `places365_dataset()`for loading the Places365 dataset (@koshtiakanksha, #196).
* Added `pascal_segmentation_dataset()`, and `pascal_detection_dataset()` for loading the Pascal Visual Object Classes datasets (@DerrickUnleashed, #209).
* Added `whoi_plankton_dataset()`, and `whoi_small_plankton_dataset()` (@cregouby, #236).
* Added `whoi_plankton_dataset()`, `whoi_small_plankton_dataset()`, and `whoi_small_coral_dataset()` (@cregouby, #236).

## New models

Expand Down
2 changes: 1 addition & 1 deletion R/dataset-flowers.R
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ flowers102_dataset <- dataset(
meta <- readRDS(file.path(self$processed_folder, glue::glue("{self$split}.rds")))
self$img_path <- meta$img_path
self$labels <- meta$labels
cli_inform("Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} loaded with {length(self$img_path)} samples.")
cli_inform("Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} loaded with {self$.length()} samples.")
},

.getitem = function(index) {
Expand Down
49 changes: 45 additions & 4 deletions R/dataset-plankton.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@
#' WHOI-Plankton Dataset
#'
#' The WHOI-Plankton and WHOI-Plankton small are **image classification** datasets
#' of submarine plankton small grayscale images of varying size, classified into 100 classes.
#' from the Woods Hole Oceanographic Institution (WHOI) of microscopic marine plankton.
#' https://hdl.handle.net/10.1575/1912/7341
#' Images were collected in situ by automated submersible imaging-in-flow cytometry
#' with an instrument called Imaging FlowCytobot (IFCB). They are small grayscale images
#' of varying size.
#' Images are classified into 100 classes, with an overview available in
#' [project Wiki page](https://whoigit.github.io/whoi-plankton/)
#' Dataset size is 957k and 58k respectively, and each provides a train / val / test split.
#'
#' @inheritParams eurosat_dataset
Expand Down Expand Up @@ -66,6 +72,7 @@ whoi_small_plankton_dataset <- torch::dataset(
install.packages("prettyunits")
}

self$split <- match.arg(split, c("train", "val", "test"))
self$transform <- transform
self$target_transform <- target_transform
self$archive_url <- self$resources[self$resources$split == split,]$url
Expand All @@ -74,7 +81,7 @@ whoi_small_plankton_dataset <- torch::dataset(
self$split_file <- sapply(self$archive_url, \(x) file.path(rappdirs::user_cache_dir("torch"), class(self)[1], sub("\\?download=.*", "", basename(x))))

if (download) {
cli_inform("Dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if not already available.")
cli_inform("Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be downloaded and processed if not already available.")
self$download()
}

Expand Down Expand Up @@ -106,7 +113,12 @@ whoi_small_plankton_dataset <- torch::dataset(

.getitem = function(index) {
df <- self$.data[index,]$to_data_frame()
x <- df$image$bytes %>% unlist() %>% as.raw() %>% png::readPNG()
x_raw <- df$image$bytes %>% unlist() %>% as.raw()
if (tolower(tools::file_ext(df$image$path)) == "jpg") {
x <- jpeg::readJPEG(x_raw)
} else {
x <- png::readPNG(x_raw)
}
y <- df$label + 1L

if (!is.null(self$transform))
Expand All @@ -127,7 +139,7 @@ whoi_small_plankton_dataset <- torch::dataset(

#' WHOI-Plankton Dataset
#'
#' @inheritParams whoi_plankton_dataset#'
#' @inheritParams whoi_plankton_dataset
#' @rdname whoi_plankton_dataset
#' @export
whoi_plankton_dataset <- torch::dataset(
Expand Down Expand Up @@ -162,3 +174,32 @@ whoi_plankton_dataset <- torch::dataset(
size = c(rep(450e6, 4), rep(490e6, 13), rep(450e6, 2))
)
)


#' Coralnet Dataset
#'
#' Small Coralnet dataset is an image **classification dataset**
#' of very large submarine coral reef images annotated into 3 classes
#' and produced by [CoralNet](https://coralnet.ucsd.edu),
#' a resource for benthic images classification.
#'
#' @inheritParams whoi_plankton_dataset
#' @export
whoi_small_coralnet_dataset <- torch::dataset(
name = "whoi_small_coralnet",
inherit = whoi_small_plankton_dataset,
archive_size = "2.1 GB",
resources = data.frame(
split = c("test", rep("train", 4), "val"),
url = c("https://huggingface.co/datasets/nf-whoi/coralnet-small/resolve/main/data/test-00000-of-00001.parquet?download=true",
paste0("https://huggingface.co/datasets/nf-whoi/coralnet-small/resolve/main/data/train-0000",0:3,"-of-00004.parquet?download=true"),
"https://huggingface.co/datasets/nf-whoi/coralnet-small/resolve/main/data/validation-00000-of-00001.parquet?download=true"),
md5 = c("f9a3ce864fdbeb5f1f3d243fe1285186",
"82269e2251db22ef213e438126198afd",
"82d2cafbad7740e476310565a2bcd44e",
"f4dd2d2effc1f9c02918e3ee614b85d3",
"d66ec691a4c5c63878a9cfff164a6aaf",
"7ea146b9b2f7b6cee99092bd44182d06"),
size = c(430e6, rep(380e6, 4), 192e6)
)
)
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ reference:
- ends_with("1_dataset")
- ends_with("2_dataset")
- ends_with("5_dataset")
- ends_with("kton_dataset")
- subtitle: for Object Detection
descr: >
Dataset having items with "y" as a named list of bounding-box and
Expand Down
Binary file modified inst/po/fr/LC_MESSAGES/R-torchvision.mo
Binary file not shown.
8 changes: 7 additions & 1 deletion man/whoi_plankton_dataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions man/whoi_small_coralnet_dataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 25 additions & 15 deletions po/R-fr.po
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
msgid ""
msgstr ""
"Project-Id-Version: torchvision 0.7.0.9000\n"
"POT-Creation-Date: 2025-08-10 20:26+0200\n"
"PO-Revision-Date: 2025-08-10 20:35+0200\n"
"POT-Creation-Date: 2025-08-15 10:15+0200\n"
"PO-Revision-Date: 2025-08-15 10:18+0200\n"
"Last-Translator: Christophe Regouby <christophe.regouby@free.fr>\n"
"Language-Team: \n"
"Language: fr\n"
Expand Down Expand Up @@ -46,12 +46,12 @@ msgstr ""
#: dataset-lfw.R:246 dataset-mnist.R:86 dataset-mnist.R:234 dataset-mnist.R:421
#: dataset-oxfordiiitpet.R:71 dataset-oxfordiiitpet.R:283
#: dataset-oxfordiiitpet.R:347 dataset-pascal.R:136 dataset-pascal.R:294
#: dataset-places365.R:98 dataset-plankton.R:71
#: dataset-places365.R:98
msgid ""
"Dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be "
"downloaded and processed if not already available."
msgstr ""
"Le jeu de données {.cls {class(self)[[1]]}} de taille (~{.emph "
"Le jeu de données {.cls {class(self)[[1]]}} (de taille ~{.emph "
"{self$archive_size}}) sera téléchargé et traité s'il n'est pas déjà "
"disponible."

Expand All @@ -60,7 +60,7 @@ msgstr ""
#: dataset-flowers.R:97 dataset-lfw.R:113 dataset-lfw.R:251 dataset-mnist.R:91
#: dataset-mnist.R:239 dataset-mnist.R:426 dataset-oxfordiiitpet.R:76
#: dataset-oxfordiiitpet.R:288 dataset-oxfordiiitpet.R:352 dataset-pascal.R:141
#: dataset-pascal.R:299 dataset-places365.R:103 dataset-plankton.R:76
#: dataset-pascal.R:299 dataset-places365.R:103 dataset-plankton.R:89
msgid "Dataset not found. You can use `download = TRUE` to download it."
msgstr ""
"Jeu de données introuvable. Veuillez ajouter `download = TRUE` pour le "
Expand All @@ -69,7 +69,7 @@ msgstr ""
#: dataset-caltech.R:84 dataset-caltech.R:208 dataset-lfw.R:135
#: dataset-lfw.R:286 dataset-oxfordiiitpet.R:95 dataset-oxfordiiitpet.R:302
#: dataset-oxfordiiitpet.R:366 dataset-pascal.R:149 dataset-pascal.R:317
#: dataset-plankton.R:83
#: dataset-plankton.R:93
msgid ""
"{.cls {class(self)[[1]]}} dataset loaded with {self$.length()} images across "
"{length(self$classes)} classes."
Expand All @@ -79,15 +79,15 @@ msgstr ""

#: dataset-caltech.R:117 dataset-coco.R:187 dataset-eurosat.R:81
#: dataset-flickr.R:136 dataset-lfw.R:146 dataset-oxfordiiitpet.R:107
#: dataset-pascal.R:158 dataset-plankton.R:90
#: dataset-pascal.R:158 dataset-plankton.R:100
msgid "Downloading {.cls {class(self)[[1]]}}..."
msgstr "Téléchargement de {.cls {class(self)[[1]]}}..."

#: dataset-caltech.R:125 dataset-cifar.R:111 dataset-coco.R:193
#: dataset-eurosat.R:88 dataset-fer.R:129 dataset-flowers.R:136
#: dataset-lfw.R:157 dataset-lfw.R:172 dataset-mnist.R:121 dataset-mnist.R:264
#: dataset-mnist.R:453 dataset-oxfordiiitpet.R:115 dataset-pascal.R:168
#: dataset-places365.R:182 dataset-plankton.R:95 models-vit.R:49
#: dataset-places365.R:182 dataset-plankton.R:105 models-vit.R:49
msgid "Corrupt file! Delete the file in {archive} and try again."
msgstr "Fichier corrompu. Supprimez le fichier {archive} et recommencez."

Expand Down Expand Up @@ -160,7 +160,7 @@ msgstr "Extraction de l'archive {.cls {class(self)[[1]]}} terminée..."
#: dataset-eurosat.R:98
msgid "Downloading {.cls {class(self)[[1]]}} split file: {self$split_url}"
msgstr ""
"Téléchargement du fichier de split de {.cls {class(self)[[1]]}} depuis "
"Téléchargement du fichier de partition de {.cls {class(self)[[1]]}} depuis "
"{self$split_url} ..."

#: dataset-eurosat.R:102
Expand Down Expand Up @@ -214,18 +214,18 @@ msgstr ""
#: dataset-flowers.R:102
msgid ""
"Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} loaded with "
"{length(self$img_path)} samples."
"{self$.length()} samples."
msgstr ""
"Le sous-ensemble {.val {self$split}} du jeu de données {.cls {class(self)"
"[[1]]}} chargé avec {length(self$img_path)} images."
"La partition {.val {self$split}} du jeu de données {.cls {class(self)[[1]]}} "
"est chargée avec {length(self$img_path)} images."

#: dataset-flowers.R:126
msgid ""
"Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} is already "
"processed and cached."
msgstr ""
"Le sous-ensemble {.val {self$split}} du jeu de données {.cls {class(self)"
"[[1]]}} a déjà été préparé et mis en cache."
"La partition {.val {self$split}} du jeu de données {.cls {class(self)[[1]]}} "
"a déjà été préparé et mis en cache."

#: dataset-flowers.R:140
msgid "{.cls {class(self)[[1]]}} Extracting images and processing dataset..."
Expand Down Expand Up @@ -262,9 +262,19 @@ msgstr "Partage non valide : {self$split}"
#: dataset-places365.R:163
msgid "Downloading {.cls {class(self)[[1]]}} split '{self$split}'..."
msgstr ""
"Téléchargement du fichier de partage '{self$split}' de {.cls {class(self)"
"Téléchargement du fichier de partition '{self$split}' de {.cls {class(self)"
"[[1]]} ..."

#: dataset-plankton.R:84
msgid ""
"Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} (~{.emph "
"{self$archive_size}}) will be downloaded and processed if not already "
"available."
msgstr ""
"La partition {.val {self$split}} du jeu de données {.cls {class(self)[[1]]}} "
"(de taille ~{.emph {self$archive_size}}) sera téléchargée et traitée si elle "
"n'est pas déjà disponible."

#: extension.R:2
msgid ""
"has_ops() Not implemented yet. https://github.com/pytorch/vision/blob/"
Expand Down
21 changes: 14 additions & 7 deletions po/R-torchvision.pot
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
msgid ""
msgstr ""
"Project-Id-Version: torchvision 0.7.0.9000\n"
"POT-Creation-Date: 2025-08-10 20:26+0200\n"
"POT-Creation-Date: 2025-08-15 10:15+0200\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
Expand Down Expand Up @@ -38,27 +38,27 @@ msgstr ""
msgid "deprecated"
msgstr ""

#: dataset-caltech.R:61 dataset-cifar.R:52 dataset-coco.R:85 dataset-coco.R:305 dataset-eurosat.R:57 dataset-fer.R:63 dataset-fgvc.R:91 dataset-flickr.R:69 dataset-flickr.R:231 dataset-flowers.R:92 dataset-lfw.R:108 dataset-lfw.R:246 dataset-mnist.R:86 dataset-mnist.R:234 dataset-mnist.R:421 dataset-oxfordiiitpet.R:71 dataset-oxfordiiitpet.R:283 dataset-oxfordiiitpet.R:347 dataset-pascal.R:136 dataset-pascal.R:294 dataset-places365.R:98 dataset-plankton.R:71
#: dataset-caltech.R:61 dataset-cifar.R:52 dataset-coco.R:85 dataset-coco.R:305 dataset-eurosat.R:57 dataset-fer.R:63 dataset-fgvc.R:91 dataset-flickr.R:69 dataset-flickr.R:231 dataset-flowers.R:92 dataset-lfw.R:108 dataset-lfw.R:246 dataset-mnist.R:86 dataset-mnist.R:234 dataset-mnist.R:421 dataset-oxfordiiitpet.R:71 dataset-oxfordiiitpet.R:283 dataset-oxfordiiitpet.R:347 dataset-pascal.R:136 dataset-pascal.R:294 dataset-places365.R:98
msgid ""
"Dataset {.cls {class(self)[[1]]}} (~{.emph {self$archive_size}}) will be "
"downloaded and processed if not already available."
msgstr ""

#: dataset-caltech.R:66 dataset-caltech.R:187 dataset-coco.R:90 dataset-coco.R:310 dataset-eurosat.R:64 dataset-fer.R:70 dataset-flowers.R:97 dataset-lfw.R:113 dataset-lfw.R:251 dataset-mnist.R:91 dataset-mnist.R:239 dataset-mnist.R:426 dataset-oxfordiiitpet.R:76 dataset-oxfordiiitpet.R:288 dataset-oxfordiiitpet.R:352 dataset-pascal.R:141 dataset-pascal.R:299 dataset-places365.R:103 dataset-plankton.R:76
#: dataset-caltech.R:66 dataset-caltech.R:187 dataset-coco.R:90 dataset-coco.R:310 dataset-eurosat.R:64 dataset-fer.R:70 dataset-flowers.R:97 dataset-lfw.R:113 dataset-lfw.R:251 dataset-mnist.R:91 dataset-mnist.R:239 dataset-mnist.R:426 dataset-oxfordiiitpet.R:76 dataset-oxfordiiitpet.R:288 dataset-oxfordiiitpet.R:352 dataset-pascal.R:141 dataset-pascal.R:299 dataset-places365.R:103 dataset-plankton.R:89
msgid "Dataset not found. You can use `download = TRUE` to download it."
msgstr ""

#: dataset-caltech.R:84 dataset-caltech.R:208 dataset-lfw.R:135 dataset-lfw.R:286 dataset-oxfordiiitpet.R:95 dataset-oxfordiiitpet.R:302 dataset-oxfordiiitpet.R:366 dataset-pascal.R:149 dataset-pascal.R:317 dataset-plankton.R:83
#: dataset-caltech.R:84 dataset-caltech.R:208 dataset-lfw.R:135 dataset-lfw.R:286 dataset-oxfordiiitpet.R:95 dataset-oxfordiiitpet.R:302 dataset-oxfordiiitpet.R:366 dataset-pascal.R:149 dataset-pascal.R:317 dataset-plankton.R:93
msgid ""
"{.cls {class(self)[[1]]}} dataset loaded with {self$.length()} images across "
"{length(self$classes)} classes."
msgstr ""

#: dataset-caltech.R:117 dataset-coco.R:187 dataset-eurosat.R:81 dataset-flickr.R:136 dataset-lfw.R:146 dataset-oxfordiiitpet.R:107 dataset-pascal.R:158 dataset-plankton.R:90
#: dataset-caltech.R:117 dataset-coco.R:187 dataset-eurosat.R:81 dataset-flickr.R:136 dataset-lfw.R:146 dataset-oxfordiiitpet.R:107 dataset-pascal.R:158 dataset-plankton.R:100
msgid "Downloading {.cls {class(self)[[1]]}}..."
msgstr ""

#: dataset-caltech.R:125 dataset-cifar.R:111 dataset-coco.R:193 dataset-eurosat.R:88 dataset-fer.R:129 dataset-flowers.R:136 dataset-lfw.R:157 dataset-lfw.R:172 dataset-mnist.R:121 dataset-mnist.R:264 dataset-mnist.R:453 dataset-oxfordiiitpet.R:115 dataset-pascal.R:168 dataset-places365.R:182 dataset-plankton.R:95 models-vit.R:49
#: dataset-caltech.R:125 dataset-cifar.R:111 dataset-coco.R:193 dataset-eurosat.R:88 dataset-fer.R:129 dataset-flowers.R:136 dataset-lfw.R:157 dataset-lfw.R:172 dataset-mnist.R:121 dataset-mnist.R:264 dataset-mnist.R:453 dataset-oxfordiiitpet.R:115 dataset-pascal.R:168 dataset-places365.R:182 dataset-plankton.R:105 models-vit.R:49
msgid "Corrupt file! Delete the file in {archive} and try again."
msgstr ""

Expand Down Expand Up @@ -158,7 +158,7 @@ msgstr ""
#: dataset-flowers.R:102
msgid ""
"Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} loaded with "
"{length(self$img_path)} samples."
"{self$.length()} samples."
msgstr ""

#: dataset-flowers.R:126
Expand Down Expand Up @@ -198,6 +198,13 @@ msgstr ""
msgid "Downloading {.cls {class(self)[[1]]}} split '{self$split}'..."
msgstr ""

#: dataset-plankton.R:84
msgid ""
"Split {.val {self$split}} of dataset {.cls {class(self)[[1]]}} (~{.emph "
"{self$archive_size}}) will be downloaded and processed if not already "
"available."
msgstr ""

#: extension.R:2
msgid ""
"has_ops() Not implemented yet. https://github.com/pytorch/vision/blob/"
Expand Down
Loading
Loading