Skip to content

Commit

Permalink
Fix character encoding issues with dir_ls() on Windows
Browse files Browse the repository at this point in the history
Fixes #42
  • Loading branch information
jimhester committed Jan 19, 2018
1 parent 20fc792 commit b0b7a88
Show file tree
Hide file tree
Showing 10 changed files with 57 additions and 9 deletions.
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,7 @@ path_ <- function(paths, ext) {
.Call(`_fs_path_`, paths, ext)
}

expand_ <- function(path) {
.Call(`_fs_expand_`, path)
}

2 changes: 1 addition & 1 deletion R/create.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ dir_create <- function(path, mode = "u=rwx,go=rx", recursive = TRUE) {
if (length(p) == 1 || !isTRUE(recursive)) {
mkdir_(p, mode)
} else {
p_paths <- Reduce(file.path, p, accumulate = TRUE)
p_paths <- Reduce(get("path", mode = "function"), p, accumulate = TRUE)
if (is_absolute_path(p[[1]])) {
p_paths <- p_paths[-1]
}
Expand Down
6 changes: 3 additions & 3 deletions R/path.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ NULL
#' @export
#' @seealso [base::file.path()]
path <- function(..., ext = "") {
path_tidy(path_(lapply(list(...), as.character), ext))
path_tidy(path_(lapply(list(...), function(x) enc2utf8(as.character(x))), ext))
}

#' @describeIn path_math returns the canonical path, eliminating any symbolic
Expand All @@ -72,7 +72,7 @@ path_real <- function(path) {
path_expand <- function(path) {
path <- enc2utf8(path)

new_fs_path(path.expand(path))
new_fs_path(expand_(path))
}

#' Tidy paths
Expand Down Expand Up @@ -123,7 +123,7 @@ path_join <- function(parts) {
return(path_tidy(""))
}
if (is.character(parts)) {
return(path_tidy(path_(parts, "")))
return(path_tidy(path_(enc2utf8(parts), "")))
}
path_tidy(vapply(parts, path_join, character(1)))
}
Expand Down
12 changes: 12 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,17 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// expand_
CharacterVector expand_(CharacterVector path);
RcppExport SEXP _fs_expand_(SEXP pathSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< CharacterVector >::type path(pathSEXP);
rcpp_result_gen = Rcpp::wrap(expand_(path));
return rcpp_result_gen;
END_RCPP
}

static const R_CallMethodDef CallEntries[] = {
{"_fs_mkdir_", (DL_FUNC) &_fs_mkdir_, 2},
Expand All @@ -299,6 +310,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_fs_readlink_", (DL_FUNC) &_fs_readlink_, 1},
{"_fs_realize_", (DL_FUNC) &_fs_realize_, 1},
{"_fs_path_", (DL_FUNC) &_fs_path_, 2},
{"_fs_expand_", (DL_FUNC) &_fs_expand_, 1},
{NULL, NULL, 0}
};

Expand Down
3 changes: 2 additions & 1 deletion src/dir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "CollectorList.h"
#include "Rcpp.h"
#include "error.h"
#include "utils.h"

using namespace Rcpp;

Expand Down Expand Up @@ -113,7 +114,7 @@ void dir_map(
}
uv_dirent_type_t entry_type = get_dirent_type(name.c_str(), e.type);
if (file_type == -1 || (((1 << (entry_type)) & file_type) > 0)) {
value->push_back(fun(name));
value->push_back(fun(asCharacterVector(name)));
}

if (recurse && entry_type == UV_DIRENT_DIR) {
Expand Down
2 changes: 1 addition & 1 deletion src/link.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ CharacterVector readlink_(CharacterVector path) {
uv_fs_t req;
const char* p = CHAR(STRING_ELT(path, i));
uv_fs_readlink(uv_default_loop(), &req, p, NULL);
SET_STRING_ELT(out, i, Rf_mkChar((const char*)req.ptr));
SET_STRING_ELT(out, i, Rf_mkCharCE((const char*)req.ptr, CE_UTF8));
stop_for_error(req, "Failed to read link '%s'", p);
uv_fs_req_cleanup(&req);
}
Expand Down
19 changes: 17 additions & 2 deletions src/path.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ CharacterVector path_(List paths, const char* ext) {
break;
}

const char* s = Rf_translateCharUTF8(str);
const char* s = CHAR(str);
strcpy(b, s);
b += strlen(s);

Expand All @@ -66,7 +66,22 @@ CharacterVector path_(List paths, const char* ext) {
b += strlen(ext) + 1;
}
*b = '\0';
out[r] = buf;
out[r] = Rf_mkCharCE(buf, CE_UTF8);
}
}
return out;
}

// [[Rcpp::export]]
CharacterVector expand_(CharacterVector path) {
CharacterVector out = CharacterVector(path.size());

for (R_xlen_t i = 0; i < Rf_xlength(out); ++i) {
if (STRING_ELT(path, i) == R_NaString) {
SET_STRING_ELT(out, i, R_NaString);
} else {
const char* p = CHAR(STRING_ELT(path, i));
SET_STRING_ELT(out, i, Rf_mkCharCE(R_ExpandFileName(p), CE_UTF8));
}
}
return out;
Expand Down
5 changes: 5 additions & 0 deletions src/utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "Rcpp.h"

inline Rcpp::CharacterVector asCharacterVector(std::string x) {
return Rcpp::CharacterVector(Rf_mkCharCE(x.c_str(), CE_UTF8));
}
11 changes: 11 additions & 0 deletions tests/testthat/test-list.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,17 @@ describe("dir_ls", {
expect_equal(dir_ls(type = c("file", "directory", "symlink")), c("dir", "file", "link"))
})
})
it("works with UTF-8 encoded filenames", {
with_dir_tree("\U7684\U6D4B\U8BD5\U6587\U4EF6", {
file_create("fs\U7684\U6D4B\U8BD5\U6587\U4EF6.docx")
link_create(path_abs("\U7684\U6D4B\U8BD5\U6587\U4EF6"), "\U7684\U6D4B")

expect_equal(dir_ls(type = "file"), "fs\U7684\U6D4B\U8BD5\U6587\U4EF6.docx")
expect_equal(dir_ls(type = "directory"), "\U7684\U6D4B\U8BD5\U6587\U4EF6")
expect_equal(dir_ls(type = "symlink"), "\U7684\U6D4B")
expect_equal(path_file(link_path("\U7684\U6D4B")), "\U7684\U6D4B\U8BD5\U6587\U4EF6")
})
})
})

describe("dir_walk", {
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-path.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ describe("path", {
})

it("returns paths UTF-8 encoded", {
skip_on_os(c("windows", "solaris"))
skip_on_os("solaris")
expect_equal(Encoding(path("\U4F60\U597D.R")), "UTF-8")
})

Expand Down

0 comments on commit b0b7a88

Please sign in to comment.