Skip to content

Commit

Permalink
Refactor to make reading files easier to test.
Browse files Browse the repository at this point in the history
  • Loading branch information
elinw committed Jul 27, 2019
1 parent 2f11ca3 commit 0c17740
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 43 deletions.
61 changes: 27 additions & 34 deletions R/read_in_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,27 +67,17 @@ read_documents_data <- function(project_name,
if (length(dir(paths$data)) != 0){
file_list <- dir(paths$data)
doc_text <- character()
# This is because not all users will be able to install textreadr.
if (!requireNamespace("textreadr", quietly = TRUE)){
for (i in 1:length(file_list)){
doc_text[i] <- readr::read_file(paste0(paths$data, file_list[i]))
}
} else {
for (i in 1:length(dir(paths$data))){
doc_text[i] <- textreadr::read_document(
file.path(paths$data, file_list[i]),
combine = TRUE)
}
}
doc_text <- read_files(file_list, doc_text, paths$data,
requireNamespace("textreadr", quietly = TRUE))

data_set <- data.frame( doc_id = seq_along(1:length(file_list)),
data_set <- data.frame( doc_id = seq_along(1:length(file_list)),
document_text = doc_text,
doc_path = file_list,
stringsAsFactors = FALSE)

# validate column names etc here
# validate column names here
actualNames <- names(data_set)
expectedNames <- c("doc_id", "document_text", "doc_path") #GOOD
expectedNames <- c("doc_id", "document_text", "doc_path")
if (sum(expectedNames %in% actualNames) != length(data_set)){
warning("Required variables for documents_data are not present")
}
Expand All @@ -100,6 +90,22 @@ read_documents_data <- function(project_name,
invisible(TRUE)
}

read_files <- function(file_list, doc_text, path_data, textreadr_available){
# This is because not all users will be able to install textreadr.
if (!textreadr_available){
for (i in 1:length(file_list)){
doc_text[i] <- readr::read_file(file.path(path_data, file_list[i]))
}
} else {
for (i in 1:length(file_list)){
doc_text[i] <- textreadr::read_document(
file.path(path_data, file_list[i]),
combine = TRUE)
}
}
doc_text
}

#' Create an empty documents data set
#'
#' Used to create a codes data frame with no data but that can
Expand Down Expand Up @@ -130,7 +136,7 @@ create_empty_docs_file <-function(path){
#' @param files file tibble produced by ShinyFiles
#' @param file_path Full path to the data set of documents including
#' trailing slash
#' @param docs_df_path Existing data frame of text documents
#' @param docs_df_path Path to existing data frame of text documents
#' @examples
#' create_qcoder_project(project_name = "my_qcoder_project", sample = TRUE)
#'
Expand All @@ -139,30 +145,19 @@ create_empty_docs_file <-function(path){
#' @export
add_new_documents <- function(files, docs_df_path = "", file_path = ""){
text_df <- readRDS(docs_df_path)
file_list <- files[["name"]]
file_list <- as.character(files[["name"]])
old_docs <- text_df[["doc_path"]]
if (length(intersect(file_list, old_docs)) != 0){
warning("One or more files are already imported")
return()
}
doc_text <- character()
if (!requireNamespace("textreadr", quietly = TRUE)){
for (i in 1:length(file_list)){
doc_text[i] <- readr::read_file(paste0(file_path,
file_list[i]))
}
} else {
for (i in 1:length(file_list)){
if (length(file_list) == 0){
return()
}
doc_text[i] <- textreadr::read_document(
paste0(file_path, file_list[i]))
}
}
doc_text <- read_files(file_list, doc_text, file_path,
requireNamespace("textreadr", quietly = TRUE))

ids <- integer(length(file_list))
new_rows <- data.frame(doc_id = ids, document_text = doc_text,
doc_path = file_list)
doc_path = as.character(file_list))
text_df <- rbind( text_df, new_rows)
row_n <- row.names(text_df)
text_df$doc_id <- ifelse(text_df$doc_id == 0, row_n,
Expand Down Expand Up @@ -269,8 +264,6 @@ read_unit_data <- function(data_path = "units/units.csv",
data_frame_name = data_frame_name)

if (file.exists(paths[["data"]])){
# units_df_path <- file.path(df_path,
# paste0(data_frame_name, "_", project_name, ".rds" ))
units <- readr::read_csv(file = paths[["data"]],
col_types = "ic" )

Expand Down
12 changes: 7 additions & 5 deletions tests/testthat/test-read_in_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@ context("Reading files into data frames for analysis")

test_that("A new file is successfully added to the documents", {
new_file_name <- "newfile.txt"
file_list <- data.frame(name = new_file_name, size = c(NA), type = c(""), datapath = c("NA/newfile6.txt"))
data_path <- "./data/"
save_path <- paste0(tempdir(), "/rqcoder_documents_my_qcoder_project.rds")
file_list <- data.frame(name = new_file_name, size = c(NA), type = c(""),
datapath = c("NA/newfile6.txt"), stringsAsFactors = FALSE)
file_path <- file.path(getwd(), "data")
data_path <- file.path(getwd(),"data/qcoder_documents_my_qcoder_project.rds")
save_path <- paste0(tempdir(), "/qcoder_documents_my_qcoder_project.rds")
file.copy("./data/qcoder_documents_my_qcoder_project.rds", save_path)
add_new_documents(files = file_list, file_path = data_path, docs_df_path = save_path)
add_new_documents(files = file_list, docs_df_path = save_path, file_path = file_path)
new_df <- readRDS(save_path)
expect_equal(nrow(new_df), 6)
expect_equal(new_df[6,"doc_path"], new_file_name)
unlink(save_path)
unlink(save_path, recursive = TRUE)
})

test_that("A new file with the same name as an existing file generates a warning", {
Expand Down
11 changes: 7 additions & 4 deletions tests/testthat/test-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,21 @@ test_that("Project with sample data is created correctly.", {
test_that("Updating documents works", {
# Set up data
qcoder_documents_my_qcoder_project <- readRDS("./data/qcoder_documents_my_qcoder_project.rds")
save_path <- paste0(tempdir(), "/", "qcoder_documents_my_qcoder_project.rds")
test_path <- file.path(tempdir(), "test")
# Should not be necessary
unlink(test_path, recursive = TRUE)
dir.create(test_path)
save_path <- file.path(test_path, "qcoder_documents_my_qcoder_project.rds")
# Do the test on a copy of the data.
saveRDS(qcoder_documents_my_qcoder_project, file = save_path)
updated_data <- as.character("A B C D")
do_update_document(updated_data, docs_df_path = save_path, "CoC_Example1_MU.txt")
input <- readRDS(file = save_path)
expect_equal(updated_data, input[2, 2])
# This should be 2 because we started with an empty directory.
expect_equal(length(list.files(tempdir())), 2)
expect_equal(length(list.files(test_path)), 2)

unlink(save_path)
setwd(basewd)
unlink(test_path, recursive = TRUE)
})

test_that("Converting to HTML works", {
Expand Down

0 comments on commit 0c17740

Please sign in to comment.