Skip to content

Commit

Permalink
Merge pull request #33 from quanteda/fixing_crossval
Browse files Browse the repository at this point in the history
Fix wrong list index and object name in crossval().
  • Loading branch information
kbenoit committed Oct 17, 2023
2 parents c227b21 + 11cfdbd commit 3aabdb7
Show file tree
Hide file tree
Showing 12 changed files with 27 additions and 12 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
^\.Rproj\.user$
README.Rmd
tests/misc/
\.github/
4 changes: 2 additions & 2 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ jobs:
steps:
- uses: actions/checkout@v2

- uses: r-lib/actions/setup-r@master
- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}

- uses: r-lib/actions/setup-pandoc@master
- uses: r-lib/actions/setup-pandoc@v2

- name: Query dependencies
run: |
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ jobs:
steps:
- uses: actions/checkout@v2

- uses: r-lib/actions/setup-r@master
- uses: r-lib/actions/setup-r@v2
with:
r-version: 'release'

- uses: r-lib/actions/setup-pandoc@master
- uses: r-lib/actions/setup-pandoc@v2

- name: Query dependencies
run: |
Expand Down
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Suggests:
URL: https://github.com/quanteda/quanteda.classifiers
BugReports: https://github.com/quanteda/quanteda.classifiers/issues
LazyData: TRUE
RoxygenNote: 7.1.1
LazyDataCompression: bzip2
RoxygenNote: 7.2.3
Language: en-GB
Roxygen: list(markdown = TRUE)
11 changes: 6 additions & 5 deletions R/crossval.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
#' @examples
#' library("quanteda")
#' library("quanteda.textmodels")
#' dfmat <- dfm(data_corpus_moviereviews)
#' dfmat <- tokens(data_corpus_moviereviews) |>
#' dfm()
#' tmod <- textmodel_nb(dfmat, y = data_corpus_moviereviews$sentiment)
#' crossval(tmod, k = 5, by_class = TRUE)
#' crossval(tmod, k = 5, by_class = FALSE)
Expand Down Expand Up @@ -38,8 +39,8 @@ crossval.textmodel <- function(x, k = 5, by_class = FALSE, verbose = FALSE) {
type = "class")
results <- c(results,
structure(list(c(performance(this_pred, x$y[folds == i]),
list(obs = split(seq_len(ndoc(x)), folds)[[k]]))),
names = paste0("fold_", k)))
list(obs = split(seq_len(ndoc(x)), folds)[[i]]))),
names = paste0("fold_", i)))
}

summ <- summarize_results(results)
Expand All @@ -66,8 +67,8 @@ summarize_results <- function(x) {

# make into a 3D array
x_df <- lapply(x, data.frame)
x_array <- array(unlist(x), dim <- c(dim(x_df[[1]]), length(x_df)),
dimnames = c(dimnames(x_df[[1]]), list(names(x))))
x_array <- array(unlist(x_df), dim <- c(dim(x_df[[1]]), length(x_df)),
dimnames = c(dimnames(x_df[[1]]), list(names(x_df))))

apply(x_array, c(1, 2), mean)
}
Binary file modified data/data_corpus_LMRD.rda
Binary file not shown.
Binary file modified data/data_corpus_manifestosentsUK.rda
Binary file not shown.
3 changes: 2 additions & 1 deletion man/crossval.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion tests/testthat/test-textmodel_cnnlstmemb.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
context("test textmodel_cnnlstmemb")

test_that("the cnnlstmemb model works", {
skip()
skip_on_cran()

data(data_corpus_EPcoaldebate, package = "quanteda.textmodels")
corp <- corpus_subset(data_corpus_EPcoaldebate,
subset = language == "English") %>%
corpus_sample(500)

toks <- tokens(texts(corp))
toks <- tokens(corp)
label <- ifelse(docvars(corp, "crowd_subsidy_label") == "Pro-Subsidy", 1, 0)
tmod <- textmodel_cnnlstmemb(toks, y = label, epochs = 8)

Expand Down Expand Up @@ -36,6 +38,7 @@ test_that("the cnnlstmemb model works", {
})

test_that("multiclass prediction works", {
skip()
skip_on_cran()

data(data_corpus_irishbudget2010, package = "quanteda.textmodels")
Expand All @@ -56,6 +59,7 @@ test_that("multiclass prediction works", {
})

test_that("cnnlstmemb works with tokens2sequences", {
skip()
skip_on_cran()

data(data_corpus_irishbudget2010, package = "quanteda.textmodels")
Expand Down
2 changes: 2 additions & 0 deletions tests/testthat/test-textmodel_evaluate.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ test_that("textmodel_evaluate works", {
skip("until rewritten")
skip_on_cran()

data(data_corpus_EPcoaldebate, package = "quanteda.textmodels")

set.seed(100)
corp <- corpus_sample(data_corpus_EPcoaldebate, size = 500, by = "crowd_subsidy_label")
dfmat <- dfm(corp) %>%
Expand Down
3 changes: 3 additions & 0 deletions tests/testthat/test-textmodel_mlp.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
context("test textmodel_mlp")

test_that("the mlp model works", {
skip("because of tensorflow install problems")
skip_on_cran()

data(data_corpus_EPcoaldebate, package = "quanteda.textmodels")

set.seed(100)
corp_train <- corpus_sample(data_corpus_EPcoaldebate, size = 3000, by = "crowd_subsidy_label")
corp_test <- corpus_sample(data_corpus_EPcoaldebate, size = 10, by = "crowd_subsidy_label")
Expand Down
2 changes: 2 additions & 0 deletions tests/testthat/test-tokens2sequences.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
context("test tokens2sequences")

test_that("tokens2sequences works", {
skip()
skip_on_cran()

## Example from 13.1 of _An Introduction to Information Retrieval_
Expand Down Expand Up @@ -38,6 +39,7 @@ test_that("tokens2sequences works", {
})

test_that("tokens2sequences_conform works", {
skip()
txt1 <- "This is sentence one. And here is sentence two."
txt2 <- "This is sentence 3. Sentence 4! A fifth and final example."
toks1 <- corpus(txt1) %>%
Expand Down

0 comments on commit 3aabdb7

Please sign in to comment.