Merge pull request #33 from quanteda/fixing_crossval

Fix wrong list index and object name in crossval().
quanteda · Oct 17, 2023 · 3aabdb7 · 3aabdb7
2 parents c227b21 + 11cfdbd
commit 3aabdb7
Show file tree

Hide file tree

Showing 12 changed files with 27 additions and 12 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -5,3 +5,4 @@
 ^\.Rproj\.user$
 README.Rmd
 tests/misc/
+\.github/
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -32,11 +32,11 @@ jobs:
     steps:
       - uses: actions/checkout@v2
 
-      - uses: r-lib/actions/setup-r@master
+      - uses: r-lib/actions/setup-r@v2
         with:
           r-version: ${{ matrix.config.r }}
 
-      - uses: r-lib/actions/setup-pandoc@master
+      - uses: r-lib/actions/setup-pandoc@v2
 
       - name: Query dependencies
         run: |

diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
@@ -14,11 +14,11 @@ jobs:
     steps:
       - uses: actions/checkout@v2
 
-      - uses: r-lib/actions/setup-r@master
+      - uses: r-lib/actions/setup-r@v2
         with:
           r-version: 'release'
 
-      - uses: r-lib/actions/setup-pandoc@master
+      - uses: r-lib/actions/setup-pandoc@v2
 
       - name: Query dependencies
         run: |

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -22,6 +22,7 @@ Suggests:
 URL: https://github.com/quanteda/quanteda.classifiers
 BugReports: https://github.com/quanteda/quanteda.classifiers/issues
 LazyData: TRUE
-RoxygenNote: 7.1.1
+LazyDataCompression: bzip2
+RoxygenNote: 7.2.3
 Language: en-GB
 Roxygen: list(markdown = TRUE)
diff --git a/R/crossval.R b/R/crossval.R
@@ -9,7 +9,8 @@
 #' @examples
 #' library("quanteda")
 #' library("quanteda.textmodels")
-#' dfmat <- dfm(data_corpus_moviereviews)
+#' dfmat <- tokens(data_corpus_moviereviews) |>
+#'   dfm()
 #' tmod <- textmodel_nb(dfmat, y = data_corpus_moviereviews$sentiment)
 #' crossval(tmod, k = 5, by_class = TRUE)
 #' crossval(tmod, k = 5, by_class = FALSE)
@@ -38,8 +39,8 @@ crossval.textmodel <- function(x, k = 5, by_class = FALSE, verbose = FALSE) {
                              type = "class")
         results <- c(results,
                      structure(list(c(performance(this_pred, x$y[folds == i]),
-                                      list(obs = split(seq_len(ndoc(x)), folds)[[k]]))),
-                               names = paste0("fold_", k)))
+                                      list(obs = split(seq_len(ndoc(x)), folds)[[i]]))),
+                               names = paste0("fold_", i)))
     }
 
     summ <- summarize_results(results)
@@ -66,8 +67,8 @@ summarize_results <- function(x) {
 
     # make into a 3D array
     x_df <- lapply(x, data.frame)
-    x_array <- array(unlist(x), dim <- c(dim(x_df[[1]]), length(x_df)),
-                     dimnames = c(dimnames(x_df[[1]]), list(names(x))))
+    x_array <- array(unlist(x_df), dim <- c(dim(x_df[[1]]), length(x_df)),
+                     dimnames = c(dimnames(x_df[[1]]), list(names(x_df))))
 
     apply(x_array, c(1, 2), mean)
 }
diff --git a/data/data_corpus_LMRD.rda b/data/data_corpus_LMRD.rda
diff --git a/data/data_corpus_manifestosentsUK.rda b/data/data_corpus_manifestosentsUK.rda
diff --git a/man/crossval.Rd b/man/crossval.Rd
diff --git a/tests/testthat/test-textmodel_cnnlstmemb.R b/tests/testthat/test-textmodel_cnnlstmemb.R
@@ -1,13 +1,15 @@
 context("test textmodel_cnnlstmemb")
 
 test_that("the cnnlstmemb model works", {
+    skip()
     skip_on_cran()
 
+    data(data_corpus_EPcoaldebate, package = "quanteda.textmodels")
     corp <- corpus_subset(data_corpus_EPcoaldebate,
                           subset = language == "English") %>%
         corpus_sample(500)
 
-    toks <- tokens(texts(corp))
+    toks <- tokens(corp)
     label <- ifelse(docvars(corp, "crowd_subsidy_label") == "Pro-Subsidy", 1, 0)
     tmod <- textmodel_cnnlstmemb(toks, y = label, epochs = 8)
 
@@ -36,6 +38,7 @@ test_that("the cnnlstmemb model works", {
 })
 
 test_that("multiclass prediction works", {
+    skip()
     skip_on_cran()
 
     data(data_corpus_irishbudget2010, package = "quanteda.textmodels")
@@ -56,6 +59,7 @@ test_that("multiclass prediction works", {
 })
 
 test_that("cnnlstmemb works with tokens2sequences", {
+    skip()
     skip_on_cran()
 
     data(data_corpus_irishbudget2010, package = "quanteda.textmodels")

diff --git a/tests/testthat/test-textmodel_evaluate.R b/tests/testthat/test-textmodel_evaluate.R
@@ -4,6 +4,8 @@ test_that("textmodel_evaluate works", {
     skip("until rewritten")
     skip_on_cran()
 
+    data(data_corpus_EPcoaldebate, package = "quanteda.textmodels")
+
     set.seed(100)
     corp <- corpus_sample(data_corpus_EPcoaldebate, size = 500, by = "crowd_subsidy_label")
     dfmat <- dfm(corp) %>% 

diff --git a/tests/testthat/test-textmodel_mlp.R b/tests/testthat/test-textmodel_mlp.R
@@ -1,8 +1,11 @@
 context("test textmodel_mlp")
 
 test_that("the mlp model works", {
+    skip("because of tensorflow install problems")
     skip_on_cran()
 
+    data(data_corpus_EPcoaldebate, package = "quanteda.textmodels")
+
     set.seed(100)
     corp_train <- corpus_sample(data_corpus_EPcoaldebate, size = 3000, by = "crowd_subsidy_label")
     corp_test <- corpus_sample(data_corpus_EPcoaldebate, size = 10, by = "crowd_subsidy_label")

diff --git a/tests/testthat/test-tokens2sequences.R b/tests/testthat/test-tokens2sequences.R
@@ -1,6 +1,7 @@
 context("test tokens2sequences")
 
 test_that("tokens2sequences works", {
+    skip()
     skip_on_cran()
 
     ## Example from 13.1 of _An Introduction to Information Retrieval_
@@ -38,6 +39,7 @@ test_that("tokens2sequences works", {
 })
 
 test_that("tokens2sequences_conform works", {
+    skip()
     txt1 <- "This is sentence one.  And here is sentence two."
     txt2 <- "This is sentence 3.  Sentence 4!  A fifth and final example."
     toks1 <- corpus(txt1) %>%