fix conversion from newer versions of seededlda

odelmarcelle · Apr 17, 2024 · b478779 · b478779
1 parent 2cc5870
commit b478779
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 3 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -3,6 +3,7 @@
 * Renamed `grow()` to `fit()` as a more intuitive name. `grow()` remains in the package for compatibility with older version.
 * Aligned the weight argument and the algorithm of `topWords(..., method="FREX")` to the original paper.
 * Fixed the un-exported function `get_ECB_conferences()` to accomodates changes from the ECB website.
+* Fixed the model conversion from newer `seededlda` versions (1.2.0 and subsequents)
 
 # sentopics 0.7.2
 

diff --git a/R/conversions.R b/R/conversions.R
@@ -311,11 +311,15 @@ as.LDA.LDA_VEM <- function(x, docs, ...) {
 #' @export
 as.LDA.textmodel_lda <- function(x, ...) {
 
+  version <- x$version
+
+  if (!(version >= "1.2.0")) stop("Conversion is not allowed from models created using a version of `seededlda` below 1.2.0")
+
   labels <- colnames(x$theta)
 
   beta <- x$phi
   beta[] <- x$beta
-  alpha <- rep(x$alpha, x$k)
+  alpha <- x$alpha
 
   tokens <- as.tokens(x$data)
   vocabulary <- makeVocabulary(tokens, NULL, 1L)
@@ -333,7 +337,7 @@ as.LDA.textmodel_lda <- function(x, ...) {
     tokens = vocabulary$toks,
     vocabulary = vocabulary$vocabulary,
     K = x$k,
-    alpha = as.matrix(rep(x$alpha, x$k)),
+    alpha = as.matrix(x$alpha),
     beta = beta,
     it = x$last_iter,
     theta = x$theta,

diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
@@ -25,7 +25,10 @@ test_that("as.tokens.dfm works", {
   toks <- as.tokens.dfm(dfm)
   expect_identical(nrow(dfm), length(toks))
   expect_identical(colnames(dfm), quanteda::types(toks))
-  expect_equal(quanteda::rowSums(dfm), quanteda::ntoken(toks))
+  expect_equal(
+    unname(quanteda::rowSums(dfm)),
+    unname(quanteda::ntoken(toks))
+  )
 
   toks <- tokens(c(
     "This text will be broken down into pieces with the `tokens` function",