Closed
Description
matrix_corpus <- function(text, bigrams = FALSE, min_count = 5)
{
if (bigrams) {
ngrams <- 1:2
} else {
ngrams <- 1
}
f <- corpus::token_filter(stemmer = "english", drop_punct = TRUE,
drop_number = TRUE, drop = stop_words)
stats <- corpus::term_counts(text, f, ngrams = ngrams, min = min_count)
x <- corpus::term_matrix(text, f, select = stats$term)
x
}
library(text2vec)
data("movie_review")
text = movie_review$review
corpus = matrix_corpus(text, bigrams = FALSE)
select term in position 1676 ('u.') contains a dropped type
Metadata
Metadata
Assignees
Labels
No labels