Skip to content

Commit

Permalink
replaced combine_tot internal fun w/ data.table version
Browse files Browse the repository at this point in the history
  • Loading branch information
Tyler Rinker committed May 12, 2017
1 parent 149529b commit 153de18
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 39 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Description: A small collection of convenience tools for reading text documents
R.
Depends: R (>= 3.2.2)
Suggests: testthat
Imports: antiword, curl, pdftools, readxl, rvest, textshape, tools, utils, xml2
Imports: antiword, curl, data.table, pdftools, readxl, rvest, textshape, tools, utils, xml2
Date: 2017-05-08
License: GPL-2
LazyData: TRUE
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ export(read_html)
export(read_pdf)
export(read_transcript)
export(unpeek)
importFrom(data.table,":=")
76 changes: 38 additions & 38 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,47 +30,47 @@ function(dataframe) {


#Helper function used in read.transcript
# ## this should work but does not
# combine_tot <- function(x){
# nms <- colnames(x)
# colnames(x) <- c('person', 'z')
# x <- data.table::data.table(x)
#
# exp <- parse(text='list(text = paste(z, collapse = " "))')[[1]]
# out <- x[, eval(exp),
# by = list(person, 'new' = data.table::rleid(person))][,
# 'new' := NULL][]
# data.table::setnames(out, nms)
# out
# }

#' @importFrom data.table :=
combine_tot <- function(x){
nms <- colnames(x)
colnames(x) <- c('person', 'z')
x <- data.table::data.table(x)

combine_tot <-
function(dataframe, combine.var = 1, text.var = 2) {
NAMES <- colnames(dataframe)
lens <- rle(as.character(dataframe[, combine.var]))
z <- lens$lengths > 1
z[lens$lengths > 1] <- 1:sum(lens$lengths > 1)
a <- rep(z, lens$lengths)
dataframe[, "ID"] <- 1:nrow(dataframe)
b <- split(dataframe, a)
w <- b[names(b) != "0"]
v <- lapply(w, function(x) {
x <- data.frame(var1 = x[1, 1],
text = paste(x[, text.var], collapse=" "),
ID = x[1, 3], stringsAsFactors = FALSE)
colnames(x)[1:2] <- NAMES
return(x)
}
)
v$x <- as.data.frame(b["0"], stringsAsFactors = FALSE)
colnames(v$x) <- unlist(strsplit(colnames(v$x), "\\."))[c(F, T)]
h <- do.call(rbind, v)
h <- h[order(h$ID), ][, -3]
rownames(h) <- NULL
return(h)
exp <- parse(text='list(text = paste(z, collapse = " "))')[[1]]
out <- x[, eval(exp),
by = list(person, 'new' = data.table::rleid(person))][,
'new' := NULL][]
data.table::setnames(out, nms)
out
}


# combine_tot <-
# function(dataframe, combine.var = 1, text.var = 2) {
# NAMES <- colnames(dataframe)
# lens <- rle(as.character(dataframe[, combine.var]))
# z <- lens$lengths > 1
# z[lens$lengths > 1] <- 1:sum(lens$lengths > 1)
# a <- rep(z, lens$lengths)
# dataframe[, "ID"] <- 1:nrow(dataframe)
# b <- split(dataframe, a)
# w <- b[names(b) != "0"]
# v <- lapply(w, function(x) {
# x <- data.frame(var1 = x[1, 1],
# text = paste(x[, text.var], collapse=" "),
# ID = x[1, 3], stringsAsFactors = FALSE)
# colnames(x)[1:2] <- NAMES
# return(x)
# }
# )
# v$x <- as.data.frame(b["0"], stringsAsFactors = FALSE)
# colnames(v$x) <- unlist(strsplit(colnames(v$x), "\\."))[c(F, T)]
# h <- do.call(rbind, v)
# h <- h[order(h$ID), ][, -3]
# rownames(h) <- NULL
# return(h)
# }

paste2 <-
function(multi.columns, sep=".", handle.na=TRUE, trim=TRUE){
if (is.matrix(multi.columns)) {
Expand Down

0 comments on commit 153de18

Please sign in to comment.