Skip to content

Commit

Permalink
fix typos
Browse files Browse the repository at this point in the history
  • Loading branch information
odelmarcelle committed Apr 17, 2024
1 parent b478779 commit 91de0f7
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 23 deletions.
4 changes: 2 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

* Renamed `grow()` to `fit()` as a more intuitive name. `grow()` remains in the package for compatibility with older version.
* Aligned the weight argument and the algorithm of `topWords(..., method="FREX")` to the original paper.
* Fixed the un-exported function `get_ECB_conferences()` to accomodates changes from the ECB website.
* Fixed the model conversion from newer `seededlda` versions (1.2.0 and subsequents)
* Fixed the un-exported function `get_ECB_conferences()` to accommodates changes from the ECB website.
* Fixed the model conversion from newer `seededlda` versions (1.2.0 and subsequent)

# sentopics 0.7.2

Expand Down
2 changes: 1 addition & 1 deletion R/conversions.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' @param alpha for \pkg{lda} models, the document-topic mixture hyperparameter.
#' If missing, the hyperparameter will be set to `50/K`.
#' @param eta for \pkg{lda} models, the topic-word mixture hyperparameter. Other
#' packages refer to this hypeparameter as *beta*. If missing, the
#' packages refer to this hyperparameter as *beta*. If missing, the
#' hyperparameter will be set to `0.01`.
#' @param ... arguments passed to other methods.
#'
Expand Down
36 changes: 18 additions & 18 deletions R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' @param x a `sentopicmodel` created from the [LDA()], [JST()] or [rJST()]
#' @param nWords the number of top words to extract
#' @param method specify if a re-ranking function should be applied before
#' returning the top words. See Details for a description of each method.
#' returning the top words. See Details for a description of each method.
#' @param output determines the output of the function
#' @param subset allows to subset using a logical expression, as in [subset()].
#' Particularly useful to limit the number of observation on plot outputs. The
Expand Down Expand Up @@ -48,18 +48,18 @@
#' weight \eqn{w}, where \eqn{\text{ECDF}} is the empirical cumulative
#' distribution function.
#'
#'
#'
#' @references Blei, DM. and Lafferty, JD. (2009). [Topic
#' models.](https://doi.org/10.1201/9781420059458-12). In *Text Mining*,
#' chapter 4, 101--124.
#'
#'
#' Bischof JM. and Airoldi, EM. (2012). [Summarizing Topical Content
#' with Word Frequency and
#' Exclusivity.](https://dl.acm.org/doi/10.5555/3042573.3042578). In
#' *Proceedings of the 29th International Coference on International
#' *Proceedings of the 29th International Conference on International
#' Conference on Machine Learning*, ICML'12, 9--16.
#'
#'
#'
#'
#' @import data.table
#' @export
#' @seealso [melt.sentopicmodel()] for extracting estimated mixtures
Expand Down Expand Up @@ -95,7 +95,7 @@ topWords <- function(x,
env))
top <- subset(top, eval(subset))
}

switch(output,
"matrix" = {
res <- matrix(top$word, nrow = nWords)
Expand Down Expand Up @@ -177,7 +177,7 @@ topWords_dt <- function(x,
epsilon <- 10^-100

method <- match.arg(method)

nClusters <- max(phiStats$L1) * max(phiStats$L2)
switch(method,
"frequency" = {
Expand All @@ -195,9 +195,9 @@ topWords_dt <- function(x,
, list(word, L1, L2, value = value + .Machine$double.eps)][
, list(L1, L2, value = value * log(value / prod(value)^(1/nClusters))), by = word]},
"FREX" = {

if (w < 0 | w > 1) stop("The argument 'w' should be constrained between 0 and 1.")

phiStats[, "exclusivity" := value / sum(value), by = word]
phiStats <-
phiStats[, list(word, value = (
Expand All @@ -224,10 +224,10 @@ topWords_dt <- function(x,

#' @rdname topWords
#' @export
#' @examples
#' @examples
#' plot_topWords(model)
#' plot_topWords(model, subset = topic %in% 1:2)
#'
#'
#' jst <- JST(ECB_press_conferences_tokens)
#' jst <- fit(jst, 10)
#' plot_topWords(jst)
Expand Down Expand Up @@ -336,7 +336,7 @@ coherence.sentopicmodel <- function(x, nWords = 10, method = c("C_NPMI", "C_V"),
#' @param ... further arguments passed to internal distance functions.
#'
#' @details The `method` argument determines how are computed distance.
#'
#'
#' - `euclidean` finds the pairs of topics that minimizes and returns the total
#' Euclidean distance.
#' - `hellinger` does the same but based on the Hellinger distance.
Expand Down Expand Up @@ -373,7 +373,7 @@ chainsDistances <- function(x,
x <- as.sentopicmodel(x)
# avoid copying base to each chain
x <- as.list(x, copy = FALSE)

method <- match.arg(method)
switch(method,
"cosine" = cosineDistances(x),
Expand Down Expand Up @@ -412,7 +412,7 @@ chainsDistances <- function(x,
#' model <- fit(model, 10, nChains = 5)
#' chainsScores(model, window = 5)
#' chainsScores(model, window = "boolean")
#'
#'
#' # -- Parallel computation --
#' require(future.apply)
#' future::plan("multisession", workers = 2) # Set up 2 workers
Expand All @@ -435,10 +435,10 @@ chainsScores <- function(x, window = 110, nWords = 10) {
} else {
NPMIsW <- NPMIs10 <- computeNPMI(x$tokens, window)
}

# avoid copying base to each chain # could be further optimized
x <- as.list(x, copy = FALSE)

FUN <- function(x) {
score <- data.table::data.table(
# name = name,
Expand All @@ -465,6 +465,6 @@ chainsScores <- function(x, window = 110, nWords = 10) {
} else {
chainsScores <- sapply(x, FUN)
}

t(chainsScores)
}
2 changes: 1 addition & 1 deletion man/as.LDA.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/topWords.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 91de0f7

Please sign in to comment.