Separate the textstat functions

- Leaves the `textplot_*()` functions in for now - one thing at a time - Assumes the existence of another package, called **quanteda.textstats** (under development) Merge remote-tracking branch 'quanteda.core/modularise' into separate-textstat # Conflicts: # DESCRIPTION # R/data-documentation.R # R/dictionaries.R # R/textstat_collocations.R # R/textstat_keyness.R # R/textstat_lexdiv.R # R/textstat_readability.R # R/textstat_summary.R # man/data_dictionary_LSD2015.Rd # man/keyness.Rd # man/textstat_keyness.Rd # man/textstat_lexdiv.Rd # man/textstat_readability.Rd # man/textstat_summary.Rd # src/RcppExports.cpp # src/collocations_mt_.cpp # tests/data_creation/update_dictionary_objects.R # tests/testthat.R # tests/testthat/test-spacyr-methods.R # tests/testthat/test-textstat_keyness.R # tests/testthat/test-tokens_recompile.R # vignettes/pkgdown/quickstart_cn.Rmd # vignettes/pkgdown/quickstart_es.Rmd # vignettes/pkgdown/quickstart_hi.Rmd # vignettes/pkgdown/quickstart_ja.Rmd # vignettes/quickstart.Rmd
quanteda · Nov 20, 2020 · 75562eb · 75562eb
2 parents 96478db + f1c191f
commit 75562eb
Show file tree

Hide file tree

Showing 101 changed files with 452 additions and 9,665 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -13,15 +13,14 @@ __MACOSX
 \.Rproj\.user
 ^images
 ^CONDUCT\.md$
-^benchmarks
 ^codecov\.yml$
 ^appveyor\.yml$
-^docs/
+^docs$
 ^_pkgdown\.yml$
-^tests/benchmarks/
-^tests/misc/
-^css/
-^vignettes/pkgdown/
+^tests/benchmarks$
+^tests/misc$
+^css$
+^vignettes/pkgdown$
 ^revdep$
 ^CNAME$
 ^pkgdown$
@@ -30,5 +29,5 @@ __MACOSX
 LICENSE
 quanteda.pdf
 ^\.lintr$
-^tests/data_creation/
+^tests/data_creation$
 ^\.github$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: quanteda
-Version: 2.1.2.9000
+Version: 2.9.9000
 Title: Quantitative Analysis of Textual Data
 Description: A fast, flexible, and comprehensive framework for 
     quantitative text analysis in R.  Provides functionality for corpus management,
@@ -16,78 +16,72 @@ Authors@R: c(
     person("Adam", "Obeng", email = "quanteda@binaryeagle.com", role = "aut", comment = c(ORCID = "0000-0002-2906-4775")),
     person("Stefan", "Müller", email = "mullers@tcd.ie", role = "aut", comment = c(ORCID = "0000-0002-6315-4125")),
     person("Akitaka", "Matsuo", email = "a.matsuo@lse.ac.uk", role = "aut", comment = c(ORCID = "0000-0002-3323-6330")),
-    person("Jiong Wei", "Lua", email = "J.W.Lua@lse.ac.uk", role = "aut"),
-    person("Jouni", "Kuha", email = "j.kuha@lse.ac.uk", role = "aut", comment = c(ORCID = "0000-0002-1156-8465")),
     person("William", "Lowe", email = "wlowe@princeton.edu", role = "aut", comment = c(ORCID = "0000-0002-1549-6163")),
     person("Christian", "Müller", email = "C.Mueller@lse.ac.uk", role = "ctb"),
-    person("Lori", "Young", role = "dtc", comment = "Lexicoder Sentiment Dictionary 2015"),
-    person("Stuart", "Soroka", role = "dtc", comment = "Lexicoder Sentiment Dictionary 2015"),
-    person("Ian", "Fellows", email = "ian@fellstat.com", role = "cph", 
-           comment = "authored wordcloud C source code (modified)"),
+    person("Ian", "Fellows", email = "ian@fellstat.com", role = "cph", comment = "authored wordcloud C source code (modified)"),
     person("European Research Council", role = "fnd", comment = "ERC-2011-StG 283794-QUANTESS")
     )
 License: GPL-3
 Depends:
-    R (>= 3.1.0),
+    R (>= 3.5.0),
     methods
 Imports:
-    data.table (>= 1.9.6),
-    extrafont,
+    data.table,
+    digest,
     fastmatch,
-    ggplot2 (>= 2.2.0),
-    ggrepel,
-    jsonlite,
     magrittr,
     Matrix (>= 1.2),
-    network,
     Rcpp (>= 0.12.12),
     RcppParallel,
-    sna,
     SnowballC,
     stopwords,
     stringi,
     xml2,
     yaml,
-    proxyC (>= 0.1.4),
-    digest
+    extrafont,
+    ggplot2,
+    ggrepel,
+    sna,
+    network
 LinkingTo: Rcpp, RcppParallel, RcppArmadillo (>= 0.7.600.1.0)
 Suggests:
-    dplyr,     
-    DT,
-    e1071,
-    entropy,
-    ExPosition,     
+    rmarkdown,
+    spelling,
+    testthat,
+    formatR,
+    tm (>= 0.6),
+    tokenizers,
+    knitr,
     lda,
     lsa,
-    proxy,
+    igraph,
+    dplyr,
     purrr,
     quanteda.textmodels,
-    RColorBrewer,
-    rmarkdown,
+    quanteda.textstats,
     slam,
     spacyr,
-    spelling,
-    stm,     
-    svs,     
-    testthat,
+    stm,
     text2vec,
+    topicmodels,
+    jsonlite,
+    quanteda,
     tibble,
-    formatR,
     tidytext,
-    tm (>= 0.6),
-    tokenizers,
-    topicmodels,
-    xtable,
-    knitr,
-    igraph,
-    wordcloud
+    wordcloud,
+    DT,
+    RColorBrewer,
+    xtable
 URL: https://quanteda.io
 Encoding: UTF-8
 BugReports: https://github.com/quanteda/quanteda/issues
 LazyData: TRUE
 VignetteBuilder: knitr
 Language: en-GB
-Collate:
+RoxygenNote: 7.1.1
+SystemRequirements: C++11
+Roxygen: list(markdown = TRUE)
+Collate: 
     'RcppExports.R'
     'View.R'
     'meta.R'
@@ -138,8 +132,6 @@ Collate:
     'kwic.R'
     'metadoc.R'
     'nfunctions.R'
-    'nscrabble.R'
-    'nsyllable.R'
     'object-builder.R'
     'pattern2fixed.R'
     'phrases.R'
@@ -153,15 +145,7 @@ Collate:
     'textplot_network.R'
     'textplot_wordcloud.R'
     'textplot_xray.R'
-    'textstat-methods.R'
-    'textstat_collocations.R'
-    'textstat_entropy.R'
-    'textstat_frequency.R'
-    'textstat_keyness.R'
-    'textstat_lexdiv.R'
-    'textstat_readability.R'
-    'textstat_simil.R'
-    'textstat_summary.R'
+    'textstat.R'
     'tokenizers.R'
     'tokens-methods-base.R'
     'tokens.R'
@@ -179,6 +163,3 @@ Collate:
     'utils.R'
     'wordstem.R'
     'zzz.R'
-RoxygenNote: 7.1.1
-SystemRequirements: C++11
-Roxygen: list(markdown = TRUE)
diff --git a/NAMESPACE b/NAMESPACE
@@ -14,7 +14,6 @@ S3method("+",tokens)
 S3method("[",corpus)
 S3method("[",kwic)
 S3method("[",summary.corpus)
-S3method("[",textstat)
 S3method("[",tokens)
 S3method("[<-",tokens)
 S3method("[[",dfm)
@@ -49,7 +48,6 @@ S3method(as.corpus,corpus)
 S3method(as.corpus,corpuszip)
 S3method(as.corpus,default)
 S3method(as.data.frame,dfm)
-S3method(as.data.frame,textstat_proxy)
 S3method(as.dfm,DocumentTermMatrix)
 S3method(as.dfm,Matrix)
 S3method(as.dfm,TermDocumentMatrix)
@@ -66,7 +64,6 @@ S3method(as.fcm,default)
 S3method(as.fcm,fcm)
 S3method(as.fcm,matrix)
 S3method(as.igraph,fcm)
-S3method(as.list,textstat_proxy)
 S3method(as.list,tokens)
 S3method(as.matrix,dfm)
 S3method(as.network,default)
@@ -197,7 +194,6 @@ S3method(featnames,dfm)
 S3method(head,corpus)
 S3method(head,dfm)
 S3method(head,fcm)
-S3method(head,textstat_proxy)
 S3method(kwic,character)
 S3method(kwic,corpus)
 S3method(kwic,default)
@@ -220,16 +216,11 @@ S3method(ndoc,spacyr_parsed)
 S3method(ndoc,tokens)
 S3method(nfeat,default)
 S3method(nfeat,dfm)
-S3method(nscrabble,character)
-S3method(nscrabble,default)
 S3method(nsentence,character)
 S3method(nsentence,corpus)
 S3method(nsentence,default)
 S3method(nsentence,spacyr_parsed)
 S3method(nsentence,tokens)
-S3method(nsyllable,character)
-S3method(nsyllable,default)
-S3method(nsyllable,tokens)
 S3method(ntoken,character)
 S3method(ntoken,corpus)
 S3method(ntoken,default)
@@ -260,7 +251,6 @@ S3method(summary,corpus)
 S3method(tail,corpus)
 S3method(tail,dfm)
 S3method(tail,fcm)
-S3method(tail,textstat_proxy)
 S3method(textplot_keyness,default)
 S3method(textplot_keyness,keyness)
 S3method(textplot_network,dfm)
@@ -273,32 +263,6 @@ S3method(textplot_xray,kwic)
 S3method(texts,character)
 S3method(texts,corpus)
 S3method(texts,readtext)
-S3method(textstat_collocations,character)
-S3method(textstat_collocations,corpus)
-S3method(textstat_collocations,default)
-S3method(textstat_collocations,tokens)
-S3method(textstat_dist,default)
-S3method(textstat_dist,dfm)
-S3method(textstat_entropy,default)
-S3method(textstat_entropy,dfm)
-S3method(textstat_frequency,default)
-S3method(textstat_frequency,dfm)
-S3method(textstat_keyness,default)
-S3method(textstat_keyness,dfm)
-S3method(textstat_lexdiv,default)
-S3method(textstat_lexdiv,dfm)
-S3method(textstat_lexdiv,tokens)
-S3method(textstat_readability,character)
-S3method(textstat_readability,corpus)
-S3method(textstat_readability,default)
-S3method(textstat_select,default)
-S3method(textstat_select,textstat)
-S3method(textstat_simil,default)
-S3method(textstat_simil,dfm)
-S3method(textstat_summary,corpus)
-S3method(textstat_summary,default)
-S3method(textstat_summary,dfm)
-S3method(textstat_summary,tokens)
 S3method(tokens,character)
 S3method(tokens,corpus)
 S3method(tokens,default)
@@ -413,7 +377,6 @@ export(featfreq)
 export(featnames)
 export(flatten_dictionary)
 export(index_types)
-export(is.collocations)
 export(is.corpus)
 export(is.dfm)
 export(is.dictionary)
@@ -428,9 +391,7 @@ export(metacorpus)
 export(metadoc)
 export(ndoc)
 export(nfeat)
-export(nscrabble)
 export(nsentence)
-export(nsyllable)
 export(ntoken)
 export(ntype)
 export(pattern2fixed)
@@ -444,17 +405,6 @@ export(textplot_network)
 export(textplot_wordcloud)
 export(textplot_xray)
 export(texts)
-export(textstat_collocations)
-export(textstat_dist)
-export(textstat_entropy)
-export(textstat_frequency)
-export(textstat_keyness)
-export(textstat_lexdiv)
-export(textstat_proxy)
-export(textstat_readability)
-export(textstat_select)
-export(textstat_simil)
-export(textstat_summary)
 export(tokenize_character)
 export(tokenize_fasterword)
 export(tokenize_fastestword)
@@ -482,20 +432,17 @@ export(tokens_wordstem)
 export(topfeatures)
 export(types)
 exportClasses(dictionary2)
-exportClasses(textstat_proxy)
 exportMethods("[")
 exportMethods("[[")
 exportMethods("rownames<-")
 exportMethods(Compare)
 exportMethods(as.list)
-exportMethods(as.matrix)
 exportMethods(c)
 exportMethods(colMeans)
 exportMethods(colSums)
 exportMethods(print)
 exportMethods(rowMeans)
 exportMethods(rowSums)
-exportMethods(show)
 exportMethods(t)
 import(Matrix)
 import(ggplot2)
@@ -504,21 +451,11 @@ importFrom(Rcpp,evalCpp)
 importFrom(RcppParallel,RcppParallelLibs)
 importFrom(RcppParallel,setThreadOptions)
 importFrom(data.table,":=")
-importFrom(data.table,.SD)
 importFrom(data.table,data.table)
-importFrom(data.table,frank)
-importFrom(data.table,setDF)
-importFrom(data.table,setcolorder)
-importFrom(data.table,setkey)
-importFrom(data.table,setorder)
 importFrom(fastmatch,fmatch)
 importFrom(graphics,text)
 importFrom(magrittr,"%>%")
-importFrom(stats,chisq.test)
 importFrom(stats,complete.cases)
-importFrom(stats,dchisq)
-importFrom(stats,fisher.test)
-importFrom(stats,na.omit)
 importFrom(stats,quantile)
 importFrom(stopwords,stopwords)
 importFrom(stringi,stri_c)

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -1,50 +1,10 @@
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
-qatd_cpp_collocations <- function(texts_, types_, words_ignore_, count_min, sizes_, method, smoothing) {
-    .Call(`_quanteda_qatd_cpp_collocations`, texts_, types_, words_ignore_, count_min, sizes_, method, smoothing)
-}
-
-qatd_cpp_manhattan <- function(A, margin = 1L) {
-    .Call(`_quanteda_qatd_cpp_manhattan`, A, margin)
-}
-
-qatd_cpp_manhattan2 <- function(A, B, margin = 1L) {
-    .Call(`_quanteda_qatd_cpp_manhattan2`, A, B, margin)
-}
-
-qatd_cpp_maximum <- function(A, margin = 1L) {
-    .Call(`_quanteda_qatd_cpp_maximum`, A, margin)
-}
-
-qatd_cpp_maximum2 <- function(A, B, margin = 1L) {
-    .Call(`_quanteda_qatd_cpp_maximum2`, A, B, margin)
-}
-
-qatd_cpp_canberra <- function(A, margin = 1L) {
-    .Call(`_quanteda_qatd_cpp_canberra`, A, margin)
-}
-
-qatd_cpp_canberra2 <- function(A, B, margin = 1L) {
-    .Call(`_quanteda_qatd_cpp_canberra2`, A, B, margin)
-}
-
-qatd_cpp_minkowski <- function(A, margin = 1L, p = 2) {
-    .Call(`_quanteda_qatd_cpp_minkowski`, A, margin, p)
-}
-
-qatd_cpp_minkowski2 <- function(A, B, margin = 1L, p = 2) {
-    .Call(`_quanteda_qatd_cpp_minkowski2`, A, B, margin, p)
-}
-
 qatd_cpp_fcm <- function(texts_, n_types, weights_, boolean, ordered) {
     .Call(`_quanteda_qatd_cpp_fcm`, texts_, n_types, weights_, boolean, ordered)
 }
 
-qatd_cpp_keyness <- function(mt, measure, correct) {
-    .Call(`_quanteda_qatd_cpp_keyness`, mt, measure, correct)
-}
-
 qatd_cpp_kwic <- function(texts_, types_, words_, pats_, window, delim_) {
     .Call(`_quanteda_qatd_cpp_kwic`, texts_, types_, words_, pats_, window, delim_)
 }