New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More robust determination of full path to current executable #795

Merged
merged 3 commits into from Sep 15, 2016
Jump to file or symbol
Failed to load files and symbols.
+78 −38
Diff settings

Always

Just for now

View
@@ -102,8 +102,7 @@ extern void caml_install_invalid_parameter_handler();
void caml_main(char **argv)
{
char * exe_name;
static char proc_self_exe[256];
char * exe_name, * proc_self_exe;
value res;
char tos;
@@ -133,7 +132,8 @@ void caml_main(char **argv)
caml_debugger_init (); /* force debugger.o stub to be linked */
exe_name = argv[0];
if (exe_name == NULL) exe_name = "";
if (caml_executable_name(proc_self_exe, sizeof(proc_self_exe)) == 0)
proc_self_exe = caml_executable_name();
if (proc_self_exe != NULL)
exe_name = proc_self_exe;
else
exe_name = caml_search_exe_in_path(exe_name);
View
@@ -82,8 +82,9 @@ extern char * caml_dlerror(void);
extern int caml_read_directory(char * dirname, struct ext_table * contents);
/* Recover executable name if possible (/proc/sef/exe under Linux,
GetModuleFileName under Windows). */
extern int caml_executable_name(char * name, int name_len);
GetModuleFileName under Windows). Return NULL on error,
string allocated with [caml_stat_alloc] on success. */
extern char * caml_executable_name(void);
#endif /* CAML_INTERNALS */
View
@@ -97,28 +97,31 @@ int caml_attempt_open(char **name, struct exec_trailer *trail,
char buf [2];
truename = caml_search_exe_in_path(*name);
*name = truename;
caml_gc_message(0x100, "Opening bytecode executable %s\n",
(uintnat) truename);
fd = open(truename, O_RDONLY | O_BINARY);
if (fd == -1) {
caml_stat_free(truename);
caml_gc_message(0x100, "Cannot open file\n", 0);
return FILE_NOT_FOUND;
}
if (!do_open_script) {
err = read (fd, buf, 2);
if (err < 2 || (buf [0] == '#' && buf [1] == '!')) {
close(fd);
caml_stat_free(truename);
caml_gc_message(0x100, "Rejected #! script\n", 0);
return BAD_BYTECODE;
}
}
err = read_trailer(fd, trail);
if (err != 0) {
close(fd);
caml_stat_free(truename);
caml_gc_message(0x100, "Not a bytecode executable\n", 0);
return err;
}
*name = truename;
return fd;
}
@@ -279,8 +282,7 @@ CAMLexport void caml_main(char **argv)
struct channel * chan;
value res;
char * shared_lib_path, * shared_libs, * req_prims;
char * exe_name;
static char proc_self_exe[256];
char * exe_name, * proc_self_exe;
ensure_spacetime_dot_o_is_included++;
@@ -308,12 +310,16 @@ CAMLexport void caml_main(char **argv)
exe_name = argv[0];
fd = caml_attempt_open(&exe_name, &trail, 0);
/* Should we really do that at all? The current executable is ocamlrun
itself, it's never a bytecode program. */
if (fd < 0
&& caml_executable_name(proc_self_exe, sizeof(proc_self_exe)) == 0) {
/* Little grasshopper wonders why we do that at all, since
"The current executable is ocamlrun itself, it's never a bytecode
program". Little grasshopper "ocamlc -custom" in mind should keep.
With -custom, we have an executable that is ocamlrun itself
concatenated with the bytecode. So, if the attempt with argv[0]
failed, it is worth trying again with executable_name. */
if (fd < 0 && (proc_self_exe = caml_executable_name()) != NULL) {
exe_name = proc_self_exe;
fd = caml_attempt_open(&exe_name, &trail, 0);
caml_stat_free(proc_self_exe);
}
if (fd < 0) {
@@ -400,7 +406,6 @@ CAMLexport void caml_startup_code(
value res;
char * cds_file;
char * exe_name;
static char proc_self_exe[256];
caml_init_ieee_floats();
#if defined(_MSC_VER) && __STDC_SECURE_LIB__ >= 200411L
@@ -415,9 +420,8 @@ CAMLexport void caml_startup_code(
caml_cds_file = caml_strdup(cds_file);
}
caml_parse_ocamlrunparam();
exe_name = argv[0];
if (caml_executable_name(proc_self_exe, sizeof(proc_self_exe)) == 0)
exe_name = proc_self_exe;
exe_name = caml_executable_name();
if (exe_name == NULL) exe_name = caml_search_exe_in_path(argv[0]);
caml_external_raise = NULL;
/* Initialize the abstract machine */
caml_init_gc (caml_init_minor_heap_wsz, caml_init_heap_wsz,
@@ -456,6 +460,7 @@ CAMLexport void caml_startup_code(
caml_section_table_size = section_table_size;
/* Initialize system libraries */
caml_sys_init(exe_name, argv);
caml_stat_free(exe_name);
/* Execute the program */
caml_debugger(PROGRAM_START);
res = caml_interprete(caml_start_code, caml_code_size);
View
@@ -43,6 +43,9 @@
#else
#include <sys/dir.h>
#endif
#ifdef __APPLE__
#include <mach-o/dyld.h>
#endif
#include "caml/fail.h"
#include "caml/memory.h"
#include "caml/misc.h"
@@ -347,28 +350,50 @@ CAMLexport int caml_read_directory(char * dirname, struct ext_table * contents)
/* Recover executable name from /proc/self/exe if possible */
#ifdef __linux__
int caml_executable_name(char * name, int name_len)
char * caml_executable_name(void)
{
int retcode;
#if defined(__linux__)
int namelen, retcode;
char * name;
struct stat st;
retcode = readlink("/proc/self/exe", name, name_len);
if (retcode == -1 || retcode >= name_len) return -1;
/* lstat("/proc/self/exe") returns st_size == 0 so we cannot use it
to determine the size of the buffer. Instead, we guess and adjust. */
namelen = 256;
while (1) {
name = caml_stat_alloc(namelen + 1);
retcode = readlink("/proc/self/exe", name, namelen);
if (retcode == -1) { caml_stat_free(name); return NULL; }
if (retcode <= namelen) break;

This comment has been minimized.

@ianthehenry

ianthehenry Aug 25, 2017

Is this check right? If readlink has to truncate its response, won't retcode == namelen?

@ianthehenry

ianthehenry Aug 25, 2017

Is this check right? If readlink has to truncate its response, won't retcode == namelen?

This comment has been minimized.

@nojb

nojb Aug 25, 2017

Contributor

Yes, but it can very well happen that more bytes than necessary were allocated, so no truncation is necessary, in which case you can have retcode < namelen.

@nojb

nojb Aug 25, 2017

Contributor

Yes, but it can very well happen that more bytes than necessary were allocated, so no truncation is necessary, in which case you can have retcode < namelen.

This comment has been minimized.

@ianthehenry

ianthehenry Aug 25, 2017

Right, but it seems like the only time that we want to break out of the loop is when retcode < namelen -- if retcode == namelen that's a sign that we want to try again with a longer name buffer.

Rephrase: if I'm understanding correctly, I think that this branch is always taken, and we never actually resize the buffer (readlink should never return retcode > namelen).

(The win32 implementation seems to have the right comparison)

@ianthehenry

ianthehenry Aug 25, 2017

Right, but it seems like the only time that we want to break out of the loop is when retcode < namelen -- if retcode == namelen that's a sign that we want to try again with a longer name buffer.

Rephrase: if I'm understanding correctly, I think that this branch is always taken, and we never actually resize the buffer (readlink should never return retcode > namelen).

(The win32 implementation seems to have the right comparison)

This comment has been minimized.

@nojb

nojb Aug 25, 2017

Contributor

Yes, sounds right. In fact, wouldn't it be even better to pass namelen + 1 as third parameter of readlink and leave the comparison as it is ?

@nojb

nojb Aug 25, 2017

Contributor

Yes, sounds right. In fact, wouldn't it be even better to pass namelen + 1 as third parameter of readlink and leave the comparison as it is ?

This comment has been minimized.

@ianthehenry

ianthehenry Aug 25, 2017

Yeah, that is even better.

@ianthehenry

ianthehenry Aug 25, 2017

Yeah, that is even better.

This comment has been minimized.

@xavierleroy

xavierleroy Aug 25, 2017

Contributor

Yes, the exit test should be retcode < namelen.

No, the length parameter of readlink must not be namelen + 1 because we need room to add the terminating 0 that readlink doesn't add.

@xavierleroy

xavierleroy Aug 25, 2017

Contributor

Yes, the exit test should be retcode < namelen.

No, the length parameter of readlink must not be namelen + 1 because we need room to add the terminating 0 that readlink doesn't add.

This comment has been minimized.

@ianthehenry

ianthehenry Aug 25, 2017

@xavierleroy given the check retcode <= namelen, we would still have room for the null byte, as we'd still resize if readlink filled the entire buffer. And we could avoid reallocating when the length of the executable is exactly namelen.

(Though perhaps the additional mental cycles spent convincing oneself that this is safe is not worth saving one allocation approximately 0% of the time.)

@ianthehenry

ianthehenry Aug 25, 2017

@xavierleroy given the check retcode <= namelen, we would still have room for the null byte, as we'd still resize if readlink filled the entire buffer. And we could avoid reallocating when the length of the executable is exactly namelen.

(Though perhaps the additional mental cycles spent convincing oneself that this is safe is not worth saving one allocation approximately 0% of the time.)

This comment has been minimized.

@xavierleroy

xavierleroy Aug 25, 2017

Contributor

Fixed on trunk, commit e49e6ce, and cherry-picked to 4.05, commit 44c67eb.

@xavierleroy

xavierleroy Aug 25, 2017

Contributor

Fixed on trunk, commit e49e6ce, and cherry-picked to 4.05, commit 44c67eb.

This comment has been minimized.

@nojb

nojb Aug 25, 2017

Contributor

But if we leave the comparison as it is and we break out of the loop it means that retcode <= namelen so that there is space for the terminating 0, right? Using retcode < namelen as exit test seems slightly less efficient because there could be some cases where retcode == namelen but not due to truncation.

@nojb

nojb Aug 25, 2017

Contributor

But if we leave the comparison as it is and we break out of the loop it means that retcode <= namelen so that there is space for the terminating 0, right? Using retcode < namelen as exit test seems slightly less efficient because there could be some cases where retcode == namelen but not due to truncation.

This comment has been minimized.

@xavierleroy

xavierleroy Aug 25, 2017

Contributor

OK, you're both right. I think the clearer code is to have namelen as the size of the buffer and as the 3rd parameter to readlink, and to break when retcode < namelen, which indeed leaves room for the final zero. Pushing this improved fix in a minute...

@xavierleroy

xavierleroy Aug 25, 2017

Contributor

OK, you're both right. I think the clearer code is to have namelen as the size of the buffer and as the 3rd parameter to readlink, and to break when retcode < namelen, which indeed leaves room for the final zero. Pushing this improved fix in a minute...

This comment has been minimized.

@xavierleroy

xavierleroy Aug 25, 2017

Contributor

Commits 7a315bd on trunk and b5610e3.

@xavierleroy

xavierleroy Aug 25, 2017

Contributor

Commits 7a315bd on trunk and b5610e3.

This comment has been minimized.

@ianthehenry

ianthehenry Aug 25, 2017

Oh yeah. That's a lot more clear.

@ianthehenry

ianthehenry Aug 25, 2017

Oh yeah. That's a lot more clear.

caml_stat_free(name);
if (namelen >= 1024*1024) return NULL; /* avoid runaway and overflow */
namelen *= 2;
}
/* readlink() does not zero-terminate its result */
name[retcode] = 0;
/* Make sure that the contents of /proc/self/exe is a regular file.
(Old Linux kernels return an inode number instead.) */
if (stat(name, &st) != 0) return -1;
if (! S_ISREG(st.st_mode)) return -1;
return 0;
}
if (stat(name, &st) == -1 || ! S_ISREG(st.st_mode)) {
caml_stat_free(name); return NULL;
}
return name;
#elif defined(__APPLE__)
unsigned int namelen;
char * name;
namelen = 256;
name = caml_stat_alloc(namelen);
if (_NSGetExecutablePath(name, &namelen) == 0) return name;
caml_stat_free(name);
/* Buffer is too small, but namelen now contains the size needed */
name = caml_stat_alloc(namelen);
if (_NSGetExecutablePath(name, &namelen) == 0) return name;
caml_stat_free(name);
return NULL;
#else
int caml_executable_name(char * name, int name_len)
{
return -1;
}
return NULL;
#endif
}
View
@@ -609,13 +609,22 @@ void caml_install_invalid_parameter_handler()
/* Recover executable name */
int caml_executable_name(char * name, int name_len)
char * caml_executable_name(void)
{
int retcode;
int ret = GetModuleFileName(NULL, name, name_len);
if (0 == ret || ret >= name_len) return -1;
return 0;
char * name;
DWORD namelen, ret;
namelen = 256;
while (1) {
name = caml_stat_alloc(namelen);
ret = GetModuleFileName(NULL, name, namelen);
if (ret == 0) { caml_stat_free(name); return NULL; }
if (ret < namelen) break;
caml_stat_free(name);
if (namelen >= 1024*1024) return NULL; /* avoid runaway and overflow */
namelen *= 2;
}
return name;
}
/* snprintf emulation */
ProTip! Use n and p to navigate between commits in a pull request.