diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index d0976082564..c702361dcf6 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -601,6 +601,13 @@ static int rte_init(void) orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SHOW_HELP, ORTE_RML_PERSISTENT, orte_show_help_recv, NULL); + /* setup the data server */ + if (ORTE_SUCCESS != (ret = orte_data_server_init())) { + ORTE_ERROR_LOG(ret); + error = "orte_data_server_init"; + goto error; + } + if (orte_create_session_dirs) { /* set the opal_output hnp file location to be in the * proc-specific session directory. */ @@ -814,6 +821,8 @@ static int rte_finalize(void) /* shutdown the pmix server */ pmix_server_finalize(); (void) mca_base_framework_close(&opal_pmix_base_framework); + /* cleanup our data server */ + orte_data_server_finalize(); (void) mca_base_framework_close(&orte_schizo_base_framework); (void) mca_base_framework_close(&orte_dfs_base_framework); diff --git a/orte/runtime/orte_data_server.c b/orte/runtime/orte_data_server.c index eb5694db74b..b2a97f73748 100644 --- a/orte/runtime/orte_data_server.c +++ b/orte/runtime/orte_data_server.c @@ -104,11 +104,17 @@ OBJ_CLASS_INSTANCE(orte_data_req_t, /* local globals */ static opal_pointer_array_t orte_data_server_store; static opal_list_t pending; +static bool initialized = false; int orte_data_server_init(void) { int rc; + if (initialized) { + return ORTE_SUCCESS; + } + initialized = true; + OBJ_CONSTRUCT(&orte_data_server_store, opal_pointer_array_t); if (ORTE_SUCCESS != (rc = opal_pointer_array_init(&orte_data_server_store, 1, @@ -134,6 +140,11 @@ void orte_data_server_finalize(void) orte_std_cntr_t i; orte_data_object_t *data; + if (!initialized) { + return; + } + initialized = false; + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DATA_SERVER); for (i=0; i < orte_data_server_store.size; i++) { diff --git a/orte/tools/orte-dvm/orte-dvm.c b/orte/tools/orte-dvm/orte-dvm.c index 109cf09aec7..3cdf585d966 100644 --- a/orte/tools/orte-dvm/orte-dvm.c +++ b/orte/tools/orte-dvm/orte-dvm.c @@ -96,7 +96,6 @@ static struct { bool help; bool version; char *report_uri; - char *basename; char *prefix; bool run_as_root; } myglobals; @@ -150,7 +149,7 @@ int main(int argc, char *argv[]) memset(&myglobals, 0, sizeof(myglobals)); /* find our basename (the name of the executable) so that we can use it in pretty-print error messages */ - myglobals.basename = opal_basename(argv[0]); + orte_basename = opal_basename(argv[0]); opal_cmd_line_create(&cmd_line, cmd_line_init); mca_base_cmd_line_setup(&cmd_line); @@ -174,7 +173,7 @@ int main(int argc, char *argv[]) OPAL_REPO_REV); if (NULL != str) { fprintf(stdout, "%s %s\n\nReport bugs to %s\n", - myglobals.basename, str, PACKAGE_BUGREPORT); + orte_basename, str, PACKAGE_BUGREPORT); free(str); } exit(0); @@ -187,10 +186,10 @@ int main(int argc, char *argv[]) if (0 == geteuid() && !myglobals.run_as_root) { fprintf(stderr, "--------------------------------------------------------------------------\n"); if (myglobals.help) { - fprintf(stderr, "%s cannot provide the help message when run as root\n", myglobals.basename); + fprintf(stderr, "%s cannot provide the help message when run as root\n", orte_basename); } else { /* show_help is not yet available, so print an error manually */ - fprintf(stderr, "%s has detected an attempt to run as root.\n", myglobals.basename); + fprintf(stderr, "%s has detected an attempt to run as root.\n", orte_basename); } fprintf(stderr, " This is *strongly* discouraged as any mistake (e.g., in defining TMPDIR) or bug can\n"); fprintf(stderr, "result in catastrophic damage to the OS file system, leaving\n"); @@ -222,15 +221,15 @@ int main(int argc, char *argv[]) if (myglobals.help) { char *str, *args = NULL; char *project_name = NULL; - if (0 == strcmp(myglobals.basename, "mpirun")) { + if (0 == strcmp(orte_basename, "mpirun")) { project_name = "Open MPI"; } else { project_name = "OpenRTE"; } args = opal_cmd_line_get_usage_msg(&cmd_line); str = opal_show_help_string("help-orterun.txt", "orterun:usage", false, - myglobals.basename, project_name, OPAL_VERSION, - myglobals.basename, args, + orte_basename, project_name, OPAL_VERSION, + orte_basename, args, PACKAGE_BUGREPORT); if (NULL != str) { printf("%s", str); @@ -245,6 +244,14 @@ int main(int argc, char *argv[]) /* Setup MCA params */ orte_register_params(); + /* save the environment for launch purposes. This MUST be + * done so that we can pass it to any local procs we + * spawn - otherwise, those local procs won't see any + * non-MCA envars were set in the enviro prior to calling + * orterun + */ + orte_launch_environ = opal_argv_copy(environ); + /* Intialize our Open RTE environment */ if (ORTE_SUCCESS != (rc = orte_init(&argc, &argv, ORTE_PROC_MASTER))) { /* cannot call ORTE_ERROR_LOG as it could be the errmgr @@ -273,7 +280,7 @@ int main(int argc, char *argv[]) fp = fopen(ptr, "w"); if (NULL == fp) { orte_show_help("help-orterun.txt", "orterun:write_file", false, - myglobals.basename, "pid", ptr); + orte_basename, "pid", ptr); exit(0); } fprintf(fp, "%s\n", uri); @@ -282,7 +289,7 @@ int main(int argc, char *argv[]) fp = fopen(myglobals.report_uri, "w"); if (NULL == fp) { orte_show_help("help-orterun.txt", "orterun:write_file", false, - myglobals.basename, "pid", myglobals.report_uri); + orte_basename, "pid", myglobals.report_uri); exit(0); } fprintf(fp, "%s\n", uri); @@ -296,13 +303,13 @@ int main(int argc, char *argv[]) /* get the daemon job object - was created by ess/hnp component */ if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) { orte_show_help("help-orterun.txt", "bad-job-object", true, - myglobals.basename); + orte_basename); exit(0); } /* also should have created a daemon "app" */ if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0))) { orte_show_help("help-orterun.txt", "bad-app-object", true, - myglobals.basename); + orte_basename); exit(0); } @@ -326,7 +333,7 @@ int main(int argc, char *argv[]) } if (0 != strcmp(param, value)) { orte_show_help("help-orterun.txt", "orterun:app-prefix-conflict", - true, myglobals.basename, value, param); + true, orte_basename, value, param); /* let the global-level prefix take precedence since we * know that one is being used */ @@ -352,7 +359,7 @@ int main(int argc, char *argv[]) param_len--; if (0 == param_len) { orte_show_help("help-orterun.txt", "orterun:empty-prefix", - true, myglobals.basename, myglobals.basename); + true, orte_basename, orte_basename); return ORTE_ERR_FATAL; } } @@ -368,7 +375,7 @@ int main(int argc, char *argv[]) if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "hostfile"))) { if(1 < j) { orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles", - true, myglobals.basename, NULL); + true, orte_basename, NULL); return ORTE_ERR_FATAL; } else { value = opal_cmd_line_get_param(&cmd_line, "hostfile", 0, 0); @@ -378,7 +385,7 @@ int main(int argc, char *argv[]) if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "machinefile"))) { if(1 < j || orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, NULL, OPAL_STRING)) { orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles", - true, myglobals.basename, NULL); + true, orte_basename, NULL); return ORTE_ERR_FATAL; } else { value = opal_cmd_line_get_param(&cmd_line, "machinefile", 0, 0); diff --git a/orte/tools/orte-submit/orte-submit.c b/orte/tools/orte-submit/orte-submit.c index fe30ef08ce6..23656b08d58 100644 --- a/orte/tools/orte-submit/orte-submit.c +++ b/orte/tools/orte-submit/orte-submit.c @@ -127,7 +127,6 @@ static struct { char *path; bool enable_recovery; char *personality; - char *basename; char *prefix; bool terminate; bool nolocal; @@ -333,7 +332,7 @@ static void spawn_recv(int status, orte_process_name_t* sender, int main(int argc, char *argv[]) { - int rc, i; + int rc; opal_cmd_line_t cmd_line; char *param; orte_job_t *jdata=NULL; @@ -344,7 +343,7 @@ int main(int argc, char *argv[]) memset(&myglobals, 0, sizeof(myglobals)); /* find our basename (the name of the executable) so that we can use it in pretty-print error messages */ - myglobals.basename = opal_basename(argv[0]); + orte_basename = opal_basename(argv[0]); opal_cmd_line_create(&cmd_line, cmd_line_init); @@ -369,7 +368,7 @@ int main(int argc, char *argv[]) OPAL_REPO_REV); if (NULL != str) { fprintf(stdout, "%s %s\n\nReport bugs to %s\n", - myglobals.basename, str, PACKAGE_BUGREPORT); + orte_basename, str, PACKAGE_BUGREPORT); free(str); } exit(0); @@ -382,10 +381,10 @@ int main(int argc, char *argv[]) if (0 == geteuid() && !myglobals.run_as_root) { fprintf(stderr, "--------------------------------------------------------------------------\n"); if (myglobals.help) { - fprintf(stderr, "%s cannot provide the help message when run as root\n", myglobals.basename); + fprintf(stderr, "%s cannot provide the help message when run as root\n", orte_basename); } else { /* show_help is not yet available, so print an error manually */ - fprintf(stderr, "%s has detected an attempt to run as root.\n", myglobals.basename); + fprintf(stderr, "%s has detected an attempt to run as root.\n", orte_basename); } fprintf(stderr, " This is *strongly* discouraged as any mistake (e.g., in defining TMPDIR) or bug can\n"); fprintf(stderr, "result in catastrophic damage to the OS file system, leaving\n"); @@ -427,7 +426,7 @@ int main(int argc, char *argv[]) char *str, *args = NULL; char *project_name = NULL; opal_output(0, "GETTING HELP"); - if (0 == strcmp(myglobals.basename, "mpirun")) { + if (0 == strcmp(orte_basename, "mpirun")) { project_name = "Open MPI"; } else { project_name = "OpenRTE"; @@ -435,8 +434,8 @@ int main(int argc, char *argv[]) args = opal_cmd_line_get_usage_msg(&cmd_line); opal_output(0, "CMD LINE %s", args); str = opal_show_help_string("help-orterun.txt", "orterun:usage", false, - myglobals.basename, project_name, OPAL_VERSION, - myglobals.basename, args, + orte_basename, project_name, OPAL_VERSION, + orte_basename, args, PACKAGE_BUGREPORT); if (NULL != str) { printf("%s", str); @@ -529,11 +528,9 @@ int main(int argc, char *argv[]) */ opal_finalize(); - for (i=0; NULL != environ[i]; i++) { - if (0 == strncmp(environ[i], "OMPI", 4)) { - fprintf(stderr, "%s\n", environ[i]); - } - } + /* clear the ess param from the environment so our children + * don't pick it up */ + opal_unsetenv("OMPI_MCA_ess", &environ); /* set the info in our contact table */ orte_rml.set_contact_info(orte_process_info.my_hnp_uri); @@ -659,7 +656,7 @@ int main(int argc, char *argv[]) /* This should never happen -- this case should be caught in create_app(), but let's just double check... */ orte_show_help("help-orterun.txt", "orterun:nothing-to-do", - true, myglobals.basename); + true, orte_basename); exit(ORTE_ERROR_DEFAULT_EXIT_CODE); } @@ -765,7 +762,7 @@ static int parse_globals(int argc, char* argv[], opal_cmd_line_t *cmd_line) fp = fopen(myglobals.report_pid, "w"); if (NULL == fp) { orte_show_help("help-orterun.txt", "orterun:write_file", false, - myglobals.basename, "pid", myglobals.report_pid); + orte_basename, "pid", myglobals.report_pid); exit(0); } fprintf(fp, "%d\n", (int)getpid()); @@ -1009,7 +1006,7 @@ static int create_app(int argc, char* argv[], if (0 == count) { orte_show_help("help-orterun.txt", "orterun:executable-not-specified", - true, myglobals.basename, myglobals.basename); + true, orte_basename, orte_basename); rc = ORTE_ERR_NOT_FOUND; goto cleanup; } @@ -1099,7 +1096,7 @@ static int create_app(int argc, char* argv[], } if (0 != strcmp(param, value)) { orte_show_help("help-orterun.txt", "orterun:app-prefix-conflict", - true, myglobals.basename, value, param); + true, orte_basename, value, param); /* let the global-level prefix take precedence since we * know that one is being used */ @@ -1125,7 +1122,7 @@ static int create_app(int argc, char* argv[], param_len--; if (0 == param_len) { orte_show_help("help-orterun.txt", "orterun:empty-prefix", - true, myglobals.basename, myglobals.basename); + true, orte_basename, orte_basename); free(param); return ORTE_ERR_FATAL; } @@ -1143,7 +1140,7 @@ static int create_app(int argc, char* argv[], if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "hostfile"))) { if(1 < j) { orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles", - true, myglobals.basename, NULL); + true, orte_basename, NULL); return ORTE_ERR_FATAL; } else { value = opal_cmd_line_get_param(&cmd_line, "hostfile", 0, 0); @@ -1153,7 +1150,7 @@ static int create_app(int argc, char* argv[], if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "machinefile"))) { if(1 < j || orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, NULL, OPAL_STRING)) { orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles", - true, myglobals.basename, NULL); + true, orte_basename, NULL); return ORTE_ERR_FATAL; } else { value = opal_cmd_line_get_param(&cmd_line, "machinefile", 0, 0); @@ -1177,7 +1174,7 @@ static int create_app(int argc, char* argv[], /* check for bozo error */ if (0 > myglobals.num_procs) { orte_show_help("help-orterun.txt", "orterun:negative-nprocs", - true, myglobals.basename, app->argv[0], + true, orte_basename, app->argv[0], myglobals.num_procs, NULL); return ORTE_ERR_FATAL; } @@ -1216,7 +1213,7 @@ static int create_app(int argc, char* argv[], app->app = strdup(app->argv[0]); if (NULL == app->app) { orte_show_help("help-orterun.txt", "orterun:call-failed", - true, myglobals.basename, "library", "strdup returned NULL", errno); + true, orte_basename, "library", "strdup returned NULL", errno); rc = ORTE_ERR_NOT_FOUND; goto cleanup; } diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index 65ac8021934..023cdea8aba 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -105,7 +105,6 @@ #include "orte/runtime/runtime.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" -#include "orte/runtime/orte_data_server.h" #include "orte/runtime/orte_locks.h" #include "orte/runtime/orte_quit.h" @@ -1011,13 +1010,6 @@ int orterun(int argc, char *argv[]) orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON, ORTE_RML_PERSISTENT, orte_daemon_recv, NULL); - /* setup the data server */ - if (ORTE_SUCCESS != (rc = orte_data_server_init())) { - ORTE_ERROR_LOG(rc); - ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); - goto DONE; - } - /* setup for debugging */ orte_debugger_init_before_spawn(jdata); orte_state.add_job_state(ORTE_JOB_STATE_READY_FOR_DEBUGGERS,