From 2368dbdfa3c438b614a4727bde7cacac2d0a87a9 Mon Sep 17 00:00:00 2001 From: nagachika Date: Fri, 16 Mar 2018 17:34:44 +0000 Subject: [PATCH] merge revision(s) 58745,58780,59040,60743: [Backport #13863] rb_w32_ugetcwd: UTF-8 version getcwd * dir.c (rb_dir_getwd): convert from UTF-8. * win32/win32.c (w32_getcwd): codepage aware getcwd using GetCurrentDirectoryW. potential memory leak * dir.c (rb_dir_getwd): get rid of potential memory leak. * util.c (ruby_getcwd): ditto. file.c: realpath in OS path encoding * dir.c (rb_dir_getwd_ospath): return cwd path in the OS path encoding. * file.c (rb_realpath_internal): work in the OS path encoding load.c: cwd encoding * load.c (rb_get_expanded_load_path): save cwd cache in OS path encoding, to get rid of unnecessary conversion and infinite loading when it needs encoding conversion. [ruby-dev:50221] [Bug #13863] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_4@62784 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- dir.c | 37 +++++++++++++++++++++---- file.c | 34 ++++++++++++++++------- internal.h | 3 ++ load.c | 13 ++------- test/ruby/test_rubyoptions.rb | 12 ++++++++ util.c | 8 ++++++ version.h | 2 +- win32/dir.h | 1 + win32/win32.c | 52 +++++++++++++++++++++++------------ 9 files changed, 118 insertions(+), 44 deletions(-) diff --git a/dir.c b/dir.c index 4a34fc9a73eaa1..b58990d6ade813 100644 --- a/dir.c +++ b/dir.c @@ -74,6 +74,7 @@ char *strchr(char*,char); #define rmdir(p) rb_w32_urmdir(p) #undef opendir #define opendir(p) rb_w32_uopendir(p) +#define ruby_getcwd() rb_w32_ugetcwd(NULL, 0) #define IS_WIN32 1 #else #define IS_WIN32 0 @@ -1048,26 +1049,52 @@ dir_s_chdir(int argc, VALUE *argv, VALUE obj) } VALUE -rb_dir_getwd(void) +rb_dir_getwd_ospath(void) { char *path; VALUE cwd; - int fsenc = rb_enc_to_index(rb_filesystem_encoding()); + VALUE path_guard; - if (fsenc == ENCINDEX_US_ASCII) fsenc = ENCINDEX_ASCII; +#undef RUBY_UNTYPED_DATA_WARNING +#define RUBY_UNTYPED_DATA_WARNING 0 + path_guard = Data_Wrap_Struct((VALUE)0, NULL, RUBY_DEFAULT_FREE, NULL); path = my_getcwd(); -#ifdef __APPLE__ + DATA_PTR(path_guard) = path; +#ifdef _WIN32 + cwd = rb_utf8_str_new_cstr(path); + OBJ_TAINT(cwd); +#elif defined __APPLE__ cwd = rb_str_normalize_ospath(path, strlen(path)); OBJ_TAINT(cwd); #else cwd = rb_tainted_str_new2(path); #endif - rb_enc_associate_index(cwd, fsenc); + DATA_PTR(path_guard) = 0; xfree(path); return cwd; } +VALUE +rb_dir_getwd(void) +{ + rb_encoding *fs = rb_filesystem_encoding(); + int fsenc = rb_enc_to_index(fs); + VALUE cwd = rb_dir_getwd_ospath(); + + switch (fsenc) { + case ENCINDEX_US_ASCII: + fsenc = ENCINDEX_ASCII; + case ENCINDEX_ASCII: + break; +#if defined _WIN32 || defined __APPLE__ + default: + return rb_str_conv_enc(cwd, NULL, fs); +#endif + } + return rb_enc_associate_index(cwd, fsenc); +} + /* * call-seq: * Dir.getwd -> string diff --git a/file.c b/file.c index 0e67654da2e0e6..43040787d00056 100644 --- a/file.c +++ b/file.c @@ -126,6 +126,14 @@ int flock(int, int); #define STAT(p, s) stat((p), (s)) #endif +#if defined _WIN32 || defined __APPLE__ +# define USE_OSPATH 1 +# define TO_OSPATH(str) rb_str_encode_ospath(str) +#else +# define USE_OSPATH 0 +# define TO_OSPATH(str) (str) +#endif + VALUE rb_cFile; VALUE rb_mFileTest; VALUE rb_cStat; @@ -222,7 +230,7 @@ rb_get_path(VALUE obj) VALUE rb_str_encode_ospath(VALUE path) { -#if defined _WIN32 || defined __APPLE__ +#if USE_OSPATH int encidx = ENCODING_GET(path); #ifdef _WIN32 if (encidx == ENCINDEX_ASCII) { @@ -3833,11 +3841,10 @@ realpath_rec(long *prefixlenp, VALUE *resolvedp, const char *unresolved, else { struct stat sbuf; int ret; - VALUE testpath2 = rb_str_encode_ospath(testpath); #ifdef __native_client__ - ret = stat(RSTRING_PTR(testpath2), &sbuf); + ret = stat(RSTRING_PTR(testpath), &sbuf); #else - ret = lstat(RSTRING_PTR(testpath2), &sbuf); + ret = lstat(RSTRING_PTR(testpath), &sbuf); #endif if (ret == -1) { int e = errno; @@ -3909,9 +3916,12 @@ rb_check_realpath_internal(VALUE basedir, VALUE path, enum rb_realpath_mode mode if (!NIL_P(basedir)) { FilePathValue(basedir); - basedir = rb_str_dup_frozen(basedir); + basedir = TO_OSPATH(rb_str_dup_frozen(basedir)); } + enc = rb_enc_get(unresolved_path); + origenc = enc; + unresolved_path = TO_OSPATH(unresolved_path); RSTRING_GETMEM(unresolved_path, ptr, len); path_names = skipprefixroot(ptr, ptr + len, rb_enc_get(unresolved_path)); if (ptr != path_names) { @@ -3928,7 +3938,7 @@ rb_check_realpath_internal(VALUE basedir, VALUE path, enum rb_realpath_mode mode } } - curdir = rb_dir_getwd(); + curdir = rb_dir_getwd_ospath(); RSTRING_GETMEM(curdir, ptr, len); curdir_names = skipprefixroot(ptr, ptr + len, rb_enc_get(curdir)); resolved = rb_str_subseq(curdir, 0, curdir_names - ptr); @@ -3936,7 +3946,6 @@ rb_check_realpath_internal(VALUE basedir, VALUE path, enum rb_realpath_mode mode root_found: RSTRING_GETMEM(resolved, prefixptr, prefixlen); pend = prefixptr + prefixlen; - enc = rb_enc_get(resolved); ptr = chompdirsep(prefixptr, pend, enc); if (ptr < pend) { prefixlen = ++ptr - prefixptr; @@ -3951,7 +3960,6 @@ rb_check_realpath_internal(VALUE basedir, VALUE path, enum rb_realpath_mode mode } #endif - origenc = enc; switch (rb_enc_to_index(enc)) { case ENCINDEX_ASCII: case ENCINDEX_US_ASCII: @@ -3970,8 +3978,14 @@ rb_check_realpath_internal(VALUE basedir, VALUE path, enum rb_realpath_mode mode if (realpath_rec(&prefixlen, &resolved, path_names, loopcheck, mode, 1)) return Qnil; - if (origenc != enc && rb_enc_str_asciionly_p(resolved)) - rb_enc_associate(resolved, origenc); + if (origenc != rb_enc_get(resolved)) { + if (rb_enc_str_asciionly_p(resolved)) { + rb_enc_associate(resolved, origenc); + } + else { + resolved = rb_str_conv_enc(resolved, NULL, origenc); + } + } OBJ_TAINT(resolved); return resolved; diff --git a/internal.h b/internal.h index 95fc9822e12ccf..c0bbe11455365d 100644 --- a/internal.h +++ b/internal.h @@ -982,6 +982,9 @@ void ruby_register_rollback_func_for_ensure(VALUE (*ensure_func)(ANYARGS), VALUE /* debug.c */ PRINTF_ARGS(void ruby_debug_printf(const char*, ...), 1, 2); +/* dir.c */ +VALUE rb_dir_getwd_ospath(void); + /* dmyext.c */ void Init_enc(void); void Init_ext(void); diff --git a/load.c b/load.c index bd0f8b8fdf02b8..b8946ac91b8b37 100644 --- a/load.c +++ b/load.c @@ -95,15 +95,6 @@ rb_construct_expanded_load_path(enum expand_type type, int *has_relative, int *h rb_ary_replace(vm->load_path_snapshot, vm->load_path); } -static VALUE -load_path_getcwd(void) -{ - char *cwd = my_getcwd(); - VALUE cwd_str = rb_filesystem_str_new_cstr(cwd); - xfree(cwd); - return cwd_str; -} - VALUE rb_get_expanded_load_path(void) { @@ -115,7 +106,7 @@ rb_get_expanded_load_path(void) int has_relative = 0, has_non_cache = 0; rb_construct_expanded_load_path(EXPAND_ALL, &has_relative, &has_non_cache); if (has_relative) { - vm->load_path_check_cache = load_path_getcwd(); + vm->load_path_check_cache = rb_dir_getwd_ospath(); } else if (has_non_cache) { /* Non string object. */ @@ -133,7 +124,7 @@ rb_get_expanded_load_path(void) } else if (vm->load_path_check_cache) { int has_relative = 1, has_non_cache = 1; - VALUE cwd = load_path_getcwd(); + VALUE cwd = rb_dir_getwd_ospath(); if (!rb_str_equal(vm->load_path_check_cache, cwd)) { /* Current working directory or filesystem encoding was changed. Expand relative load path and non-cacheable objects again. */ diff --git a/test/ruby/test_rubyoptions.rb b/test/ruby/test_rubyoptions.rb index 1ea1115aa57d13..b0312bfc04b739 100644 --- a/test/ruby/test_rubyoptions.rb +++ b/test/ruby/test_rubyoptions.rb @@ -920,4 +920,16 @@ def test_frozen_string_literal_debug end end end + + def test_cwd_encoding + with_tmpchdir do + testdir = "\u30c6\u30b9\u30c8" + Dir.mkdir(testdir) + Dir.chdir(testdir) do + File.write("a.rb", "require './b'") + File.write("b.rb", "puts 'ok'") + assert_ruby_status([{"RUBYLIB"=>"."}, *%w[-E cp932:utf-8 a.rb]]) + end + end + end end diff --git a/util.c b/util.c index 09349969844a71..3ebb2fea9a8755 100644 --- a/util.c +++ b/util.c @@ -511,7 +511,10 @@ ruby_getcwd(void) char *buf = xmalloc(2); strcpy(buf, "."); #elif defined HAVE_GETCWD +# undef RUBY_UNTYPED_DATA_WARNING +# define RUBY_UNTYPED_DATA_WARNING 0 # if defined NO_GETCWD_MALLOC + VALUE guard = Data_Wrap_Struct((VALUE)0, NULL, RUBY_DEFAULT_FREE, NULL); int size = 200; char *buf = xmalloc(size); @@ -519,17 +522,22 @@ ruby_getcwd(void) int e = errno; if (e != ERANGE) { xfree(buf); + DATA_PTR(guard) = NULL; rb_syserr_fail(e, "getcwd"); } size *= 2; + DATA_PTR(guard) = buf; buf = xrealloc(buf, size); } # else + VALUE guard = Data_Wrap_Struct((VALUE)0, NULL, free, NULL); char *buf, *cwd = getcwd(NULL, 0); + DATA_PTR(guard) = cwd; if (!cwd) rb_sys_fail("getcwd"); buf = ruby_strdup(cwd); /* allocate by xmalloc */ free(cwd); # endif + DATA_PTR(RB_GC_GUARD(guard)) = NULL; #else # ifndef PATH_MAX # define PATH_MAX 8192 diff --git a/version.h b/version.h index 0901b52cc7db09..0c7e0ac0f5cdbb 100644 --- a/version.h +++ b/version.h @@ -1,6 +1,6 @@ #define RUBY_VERSION "2.4.4" #define RUBY_RELEASE_DATE "2018-03-17" -#define RUBY_PATCHLEVEL 259 +#define RUBY_PATCHLEVEL 260 #define RUBY_RELEASE_YEAR 2018 #define RUBY_RELEASE_MONTH 3 diff --git a/win32/dir.h b/win32/dir.h index b1f981f257ee62..29c4c1c6d5b6e1 100644 --- a/win32/dir.h +++ b/win32/dir.h @@ -33,6 +33,7 @@ long rb_w32_telldir(DIR *); void rb_w32_seekdir(DIR *, long); void rb_w32_rewinddir(DIR *); void rb_w32_closedir(DIR *); +char *rb_w32_ugetcwd(char *, int); #define opendir(s) rb_w32_opendir((s)) #define readdir(d) rb_w32_readdir((d), 0) diff --git a/win32/win32.c b/win32/win32.c index 62801dae71ca69..efbe70b48117e9 100644 --- a/win32/win32.c +++ b/win32/win32.c @@ -4654,43 +4654,61 @@ clock_getres(clockid_t clock_id, struct timespec *sp) } /* License: Ruby's */ -char * -rb_w32_getcwd(char *buffer, int size) +static char * +w32_getcwd(char *buffer, int size, UINT cp) { - char *p = buffer; - int len; + WCHAR *p; + int wlen, len; - len = GetCurrentDirectory(0, NULL); + len = GetCurrentDirectoryW(0, NULL); if (!len) { errno = map_errno(GetLastError()); return NULL; } - if (p) { + if (buffer && size < len) { + errno = ERANGE; + return NULL; + } + + p = ALLOCA_N(WCHAR, len); + if (!GetCurrentDirectoryW(len, p)) { + errno = map_errno(GetLastError()); + return NULL; + } + + wlen = translate_wchar(p, L'\\', L'/') - p + 1; + len = WideCharToMultiByte(cp, 0, p, wlen, NULL, 0, NULL, NULL); + if (buffer) { if (size < len) { errno = ERANGE; return NULL; } } else { - p = malloc(len); - size = len; - if (!p) { + buffer = malloc(len); + if (!buffer) { errno = ENOMEM; return NULL; } } + WideCharToMultiByte(cp, 0, p, wlen, buffer, len, NULL, NULL); - if (!GetCurrentDirectory(size, p)) { - errno = map_errno(GetLastError()); - if (!buffer) - free(p); - return NULL; - } + return buffer; +} - translate_char(p, '\\', '/', filecp()); +/* License: Ruby's */ +char * +rb_w32_getcwd(char *buffer, int size) +{ + return w32_getcwd(buffer, size, filecp()); +} - return p; +/* License: Ruby's */ +char * +rb_w32_ugetcwd(char *buffer, int size) +{ + return w32_getcwd(buffer, size, CP_UTF8); } /* License: Artistic or GPL */