Skip to content

Commit

Permalink
Separate the textstat functions
Browse files Browse the repository at this point in the history
- Leaves the `textplot_*()` functions in for now - one thing at a time
- Assumes the existence of another package, called **quanteda.textstats** (under development)

Merge remote-tracking branch 'quanteda.core/modularise' into separate-textstat

# Conflicts:
#	DESCRIPTION
#	R/data-documentation.R
#	R/dictionaries.R
#	R/textstat_collocations.R
#	R/textstat_keyness.R
#	R/textstat_lexdiv.R
#	R/textstat_readability.R
#	R/textstat_summary.R
#	man/data_dictionary_LSD2015.Rd
#	man/keyness.Rd
#	man/textstat_keyness.Rd
#	man/textstat_lexdiv.Rd
#	man/textstat_readability.Rd
#	man/textstat_summary.Rd
#	src/RcppExports.cpp
#	src/collocations_mt_.cpp
#	tests/data_creation/update_dictionary_objects.R
#	tests/testthat.R
#	tests/testthat/test-spacyr-methods.R
#	tests/testthat/test-textstat_keyness.R
#	tests/testthat/test-tokens_recompile.R
#	vignettes/pkgdown/quickstart_cn.Rmd
#	vignettes/pkgdown/quickstart_es.Rmd
#	vignettes/pkgdown/quickstart_hi.Rmd
#	vignettes/pkgdown/quickstart_ja.Rmd
#	vignettes/quickstart.Rmd
  • Loading branch information
kbenoit committed Nov 20, 2020
2 parents 96478db + f1c191f commit 75562eb
Show file tree
Hide file tree
Showing 101 changed files with 452 additions and 9,665 deletions.
13 changes: 6 additions & 7 deletions .Rbuildignore
Expand Up @@ -13,15 +13,14 @@ __MACOSX
\.Rproj\.user
^images
^CONDUCT\.md$
^benchmarks
^codecov\.yml$
^appveyor\.yml$
^docs/
^docs$
^_pkgdown\.yml$
^tests/benchmarks/
^tests/misc/
^css/
^vignettes/pkgdown/
^tests/benchmarks$
^tests/misc$
^css$
^vignettes/pkgdown$
^revdep$
^CNAME$
^pkgdown$
Expand All @@ -30,5 +29,5 @@ __MACOSX
LICENSE
quanteda.pdf
^\.lintr$
^tests/data_creation/
^tests/data_creation$
^\.github$
85 changes: 33 additions & 52 deletions DESCRIPTION
@@ -1,5 +1,5 @@
Package: quanteda
Version: 2.1.2.9000
Version: 2.9.9000
Title: Quantitative Analysis of Textual Data
Description: A fast, flexible, and comprehensive framework for
quantitative text analysis in R. Provides functionality for corpus management,
Expand All @@ -16,78 +16,72 @@ Authors@R: c(
person("Adam", "Obeng", email = "quanteda@binaryeagle.com", role = "aut", comment = c(ORCID = "0000-0002-2906-4775")),
person("Stefan", "Müller", email = "mullers@tcd.ie", role = "aut", comment = c(ORCID = "0000-0002-6315-4125")),
person("Akitaka", "Matsuo", email = "a.matsuo@lse.ac.uk", role = "aut", comment = c(ORCID = "0000-0002-3323-6330")),
person("Jiong Wei", "Lua", email = "J.W.Lua@lse.ac.uk", role = "aut"),
person("Jouni", "Kuha", email = "j.kuha@lse.ac.uk", role = "aut", comment = c(ORCID = "0000-0002-1156-8465")),
person("William", "Lowe", email = "wlowe@princeton.edu", role = "aut", comment = c(ORCID = "0000-0002-1549-6163")),
person("Christian", "Müller", email = "C.Mueller@lse.ac.uk", role = "ctb"),
person("Lori", "Young", role = "dtc", comment = "Lexicoder Sentiment Dictionary 2015"),
person("Stuart", "Soroka", role = "dtc", comment = "Lexicoder Sentiment Dictionary 2015"),
person("Ian", "Fellows", email = "ian@fellstat.com", role = "cph",
comment = "authored wordcloud C source code (modified)"),
person("Ian", "Fellows", email = "ian@fellstat.com", role = "cph", comment = "authored wordcloud C source code (modified)"),
person("European Research Council", role = "fnd", comment = "ERC-2011-StG 283794-QUANTESS")
)
License: GPL-3
Depends:
R (>= 3.1.0),
R (>= 3.5.0),
methods
Imports:
data.table (>= 1.9.6),
extrafont,
data.table,
digest,
fastmatch,
ggplot2 (>= 2.2.0),
ggrepel,
jsonlite,
magrittr,
Matrix (>= 1.2),
network,
Rcpp (>= 0.12.12),
RcppParallel,
sna,
SnowballC,
stopwords,
stringi,
xml2,
yaml,
proxyC (>= 0.1.4),
digest
extrafont,
ggplot2,
ggrepel,
sna,
network
LinkingTo: Rcpp, RcppParallel, RcppArmadillo (>= 0.7.600.1.0)
Suggests:
dplyr,
DT,
e1071,
entropy,
ExPosition,
rmarkdown,
spelling,
testthat,
formatR,
tm (>= 0.6),
tokenizers,
knitr,
lda,
lsa,
proxy,
igraph,
dplyr,
purrr,
quanteda.textmodels,
RColorBrewer,
rmarkdown,
quanteda.textstats,
slam,
spacyr,
spelling,
stm,
svs,
testthat,
stm,
text2vec,
topicmodels,
jsonlite,
quanteda,
tibble,
formatR,
tidytext,
tm (>= 0.6),
tokenizers,
topicmodels,
xtable,
knitr,
igraph,
wordcloud
wordcloud,
DT,
RColorBrewer,
xtable
URL: https://quanteda.io
Encoding: UTF-8
BugReports: https://github.com/quanteda/quanteda/issues
LazyData: TRUE
VignetteBuilder: knitr
Language: en-GB
Collate:
RoxygenNote: 7.1.1
SystemRequirements: C++11
Roxygen: list(markdown = TRUE)
Collate:
'RcppExports.R'
'View.R'
'meta.R'
Expand Down Expand Up @@ -138,8 +132,6 @@ Collate:
'kwic.R'
'metadoc.R'
'nfunctions.R'
'nscrabble.R'
'nsyllable.R'
'object-builder.R'
'pattern2fixed.R'
'phrases.R'
Expand All @@ -153,15 +145,7 @@ Collate:
'textplot_network.R'
'textplot_wordcloud.R'
'textplot_xray.R'
'textstat-methods.R'
'textstat_collocations.R'
'textstat_entropy.R'
'textstat_frequency.R'
'textstat_keyness.R'
'textstat_lexdiv.R'
'textstat_readability.R'
'textstat_simil.R'
'textstat_summary.R'
'textstat.R'
'tokenizers.R'
'tokens-methods-base.R'
'tokens.R'
Expand All @@ -179,6 +163,3 @@ Collate:
'utils.R'
'wordstem.R'
'zzz.R'
RoxygenNote: 7.1.1
SystemRequirements: C++11
Roxygen: list(markdown = TRUE)
63 changes: 0 additions & 63 deletions NAMESPACE
Expand Up @@ -14,7 +14,6 @@ S3method("+",tokens)
S3method("[",corpus)
S3method("[",kwic)
S3method("[",summary.corpus)
S3method("[",textstat)
S3method("[",tokens)
S3method("[<-",tokens)
S3method("[[",dfm)
Expand Down Expand Up @@ -49,7 +48,6 @@ S3method(as.corpus,corpus)
S3method(as.corpus,corpuszip)
S3method(as.corpus,default)
S3method(as.data.frame,dfm)
S3method(as.data.frame,textstat_proxy)
S3method(as.dfm,DocumentTermMatrix)
S3method(as.dfm,Matrix)
S3method(as.dfm,TermDocumentMatrix)
Expand All @@ -66,7 +64,6 @@ S3method(as.fcm,default)
S3method(as.fcm,fcm)
S3method(as.fcm,matrix)
S3method(as.igraph,fcm)
S3method(as.list,textstat_proxy)
S3method(as.list,tokens)
S3method(as.matrix,dfm)
S3method(as.network,default)
Expand Down Expand Up @@ -197,7 +194,6 @@ S3method(featnames,dfm)
S3method(head,corpus)
S3method(head,dfm)
S3method(head,fcm)
S3method(head,textstat_proxy)
S3method(kwic,character)
S3method(kwic,corpus)
S3method(kwic,default)
Expand All @@ -220,16 +216,11 @@ S3method(ndoc,spacyr_parsed)
S3method(ndoc,tokens)
S3method(nfeat,default)
S3method(nfeat,dfm)
S3method(nscrabble,character)
S3method(nscrabble,default)
S3method(nsentence,character)
S3method(nsentence,corpus)
S3method(nsentence,default)
S3method(nsentence,spacyr_parsed)
S3method(nsentence,tokens)
S3method(nsyllable,character)
S3method(nsyllable,default)
S3method(nsyllable,tokens)
S3method(ntoken,character)
S3method(ntoken,corpus)
S3method(ntoken,default)
Expand Down Expand Up @@ -260,7 +251,6 @@ S3method(summary,corpus)
S3method(tail,corpus)
S3method(tail,dfm)
S3method(tail,fcm)
S3method(tail,textstat_proxy)
S3method(textplot_keyness,default)
S3method(textplot_keyness,keyness)
S3method(textplot_network,dfm)
Expand All @@ -273,32 +263,6 @@ S3method(textplot_xray,kwic)
S3method(texts,character)
S3method(texts,corpus)
S3method(texts,readtext)
S3method(textstat_collocations,character)
S3method(textstat_collocations,corpus)
S3method(textstat_collocations,default)
S3method(textstat_collocations,tokens)
S3method(textstat_dist,default)
S3method(textstat_dist,dfm)
S3method(textstat_entropy,default)
S3method(textstat_entropy,dfm)
S3method(textstat_frequency,default)
S3method(textstat_frequency,dfm)
S3method(textstat_keyness,default)
S3method(textstat_keyness,dfm)
S3method(textstat_lexdiv,default)
S3method(textstat_lexdiv,dfm)
S3method(textstat_lexdiv,tokens)
S3method(textstat_readability,character)
S3method(textstat_readability,corpus)
S3method(textstat_readability,default)
S3method(textstat_select,default)
S3method(textstat_select,textstat)
S3method(textstat_simil,default)
S3method(textstat_simil,dfm)
S3method(textstat_summary,corpus)
S3method(textstat_summary,default)
S3method(textstat_summary,dfm)
S3method(textstat_summary,tokens)
S3method(tokens,character)
S3method(tokens,corpus)
S3method(tokens,default)
Expand Down Expand Up @@ -413,7 +377,6 @@ export(featfreq)
export(featnames)
export(flatten_dictionary)
export(index_types)
export(is.collocations)
export(is.corpus)
export(is.dfm)
export(is.dictionary)
Expand All @@ -428,9 +391,7 @@ export(metacorpus)
export(metadoc)
export(ndoc)
export(nfeat)
export(nscrabble)
export(nsentence)
export(nsyllable)
export(ntoken)
export(ntype)
export(pattern2fixed)
Expand All @@ -444,17 +405,6 @@ export(textplot_network)
export(textplot_wordcloud)
export(textplot_xray)
export(texts)
export(textstat_collocations)
export(textstat_dist)
export(textstat_entropy)
export(textstat_frequency)
export(textstat_keyness)
export(textstat_lexdiv)
export(textstat_proxy)
export(textstat_readability)
export(textstat_select)
export(textstat_simil)
export(textstat_summary)
export(tokenize_character)
export(tokenize_fasterword)
export(tokenize_fastestword)
Expand Down Expand Up @@ -482,20 +432,17 @@ export(tokens_wordstem)
export(topfeatures)
export(types)
exportClasses(dictionary2)
exportClasses(textstat_proxy)
exportMethods("[")
exportMethods("[[")
exportMethods("rownames<-")
exportMethods(Compare)
exportMethods(as.list)
exportMethods(as.matrix)
exportMethods(c)
exportMethods(colMeans)
exportMethods(colSums)
exportMethods(print)
exportMethods(rowMeans)
exportMethods(rowSums)
exportMethods(show)
exportMethods(t)
import(Matrix)
import(ggplot2)
Expand All @@ -504,21 +451,11 @@ importFrom(Rcpp,evalCpp)
importFrom(RcppParallel,RcppParallelLibs)
importFrom(RcppParallel,setThreadOptions)
importFrom(data.table,":=")
importFrom(data.table,.SD)
importFrom(data.table,data.table)
importFrom(data.table,frank)
importFrom(data.table,setDF)
importFrom(data.table,setcolorder)
importFrom(data.table,setkey)
importFrom(data.table,setorder)
importFrom(fastmatch,fmatch)
importFrom(graphics,text)
importFrom(magrittr,"%>%")
importFrom(stats,chisq.test)
importFrom(stats,complete.cases)
importFrom(stats,dchisq)
importFrom(stats,fisher.test)
importFrom(stats,na.omit)
importFrom(stats,quantile)
importFrom(stopwords,stopwords)
importFrom(stringi,stri_c)
Expand Down
40 changes: 0 additions & 40 deletions R/RcppExports.R
@@ -1,50 +1,10 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

qatd_cpp_collocations <- function(texts_, types_, words_ignore_, count_min, sizes_, method, smoothing) {
.Call(`_quanteda_qatd_cpp_collocations`, texts_, types_, words_ignore_, count_min, sizes_, method, smoothing)
}

qatd_cpp_manhattan <- function(A, margin = 1L) {
.Call(`_quanteda_qatd_cpp_manhattan`, A, margin)
}

qatd_cpp_manhattan2 <- function(A, B, margin = 1L) {
.Call(`_quanteda_qatd_cpp_manhattan2`, A, B, margin)
}

qatd_cpp_maximum <- function(A, margin = 1L) {
.Call(`_quanteda_qatd_cpp_maximum`, A, margin)
}

qatd_cpp_maximum2 <- function(A, B, margin = 1L) {
.Call(`_quanteda_qatd_cpp_maximum2`, A, B, margin)
}

qatd_cpp_canberra <- function(A, margin = 1L) {
.Call(`_quanteda_qatd_cpp_canberra`, A, margin)
}

qatd_cpp_canberra2 <- function(A, B, margin = 1L) {
.Call(`_quanteda_qatd_cpp_canberra2`, A, B, margin)
}

qatd_cpp_minkowski <- function(A, margin = 1L, p = 2) {
.Call(`_quanteda_qatd_cpp_minkowski`, A, margin, p)
}

qatd_cpp_minkowski2 <- function(A, B, margin = 1L, p = 2) {
.Call(`_quanteda_qatd_cpp_minkowski2`, A, B, margin, p)
}

qatd_cpp_fcm <- function(texts_, n_types, weights_, boolean, ordered) {
.Call(`_quanteda_qatd_cpp_fcm`, texts_, n_types, weights_, boolean, ordered)
}

qatd_cpp_keyness <- function(mt, measure, correct) {
.Call(`_quanteda_qatd_cpp_keyness`, mt, measure, correct)
}

qatd_cpp_kwic <- function(texts_, types_, words_, pats_, window, delim_) {
.Call(`_quanteda_qatd_cpp_kwic`, texts_, types_, words_, pats_, window, delim_)
}
Expand Down

0 comments on commit 75562eb

Please sign in to comment.