From 91bbedc67de25f46a1740ca572a3d615b1b7fbf3 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 11 Apr 2017 07:55:26 -0700 Subject: [PATCH] Fix a segfault when starting under a debugger by setting the personality field in the orte_job_t. Also, harden the schizo stubs by checking for NULL in that field and returning an error as this should never happen. Signed-off-by: Ralph Castain --- orte/mca/schizo/base/schizo_base_stubs.c | 20 +++++++++++++-- orte/tools/orterun/orterun.c | 32 +++++++++++++----------- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/orte/mca/schizo/base/schizo_base_stubs.c b/orte/mca/schizo/base/schizo_base_stubs.c index a2e5fe1bf2c..dd70f0a4fa3 100644 --- a/orte/mca/schizo/base/schizo_base_stubs.c +++ b/orte/mca/schizo/base/schizo_base_stubs.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -13,6 +13,7 @@ #include "orte/constants.h" #include "opal/class/opal_list.h" +#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/schizo/base/base.h" int orte_schizo_base_parse_cli(char *personality, @@ -22,7 +23,7 @@ int orte_schizo_base_parse_cli(char *personality, orte_schizo_base_active_module_t *mod; if (NULL == personality) { - opal_output(0, "NULL PERSONALITY"); + ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); return ORTE_ERR_NOT_SUPPORTED; } @@ -46,6 +47,11 @@ int orte_schizo_base_parse_env(char *personality, int rc; orte_schizo_base_active_module_t *mod; + if (NULL == personality) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); + return ORTE_ERR_NOT_SUPPORTED; + } + OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) { if (0 == strcmp(personality, mod->component->mca_component_name)) { if (NULL != mod->module->parse_env) { @@ -63,6 +69,11 @@ int orte_schizo_base_setup_fork(orte_job_t *jdata, int rc; orte_schizo_base_active_module_t *mod; + if (NULL == jdata->personality) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); + return ORTE_ERR_NOT_SUPPORTED; + } + OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) { if (0 == strcmp(jdata->personality, mod->component->mca_component_name)) { if (NULL != mod->module->setup_fork) { @@ -81,6 +92,11 @@ int orte_schizo_base_setup_child(orte_job_t *jdata, int rc; orte_schizo_base_active_module_t *mod; + if (NULL == jdata->personality) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); + return ORTE_ERR_NOT_SUPPORTED; + } + OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) { if (0 == strcmp(jdata->personality, mod->component->mca_component_name)) { if (NULL != mod->module->setup_child) { diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index 1db7538bdcc..2f1989fb934 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -14,7 +14,7 @@ * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -2236,7 +2236,7 @@ static void run_debugger(char *basename, opal_cmd_line_t *cmd_line, * - fills in the table MPIR_proctable, and sets MPIR_proctable_size * - sets MPIR_debug_state to MPIR_DEBUG_SPAWNED ( = 1) * - calls MPIR_Breakpoint() which the debugger will have a - * breakpoint on. + * breakpoint on. * * b) Applications start and then spin until MPIR_debug_gate is set * non-zero by the debugger. @@ -2413,6 +2413,8 @@ static void setup_debugger_job(void) * to avoid confusing the rest of the system's bookkeeping */ orte_plm_base_create_jobid(debugger); + /* set the personality to ORTE */ + debugger->personality = strdup("orte"); /* flag the job as being debugger daemons */ ORTE_FLAG_SET(debugger, ORTE_JOB_FLAG_DEBUGGER_DAEMON); /* unless directed, we do not forward output */ @@ -2699,14 +2701,14 @@ static void orte_debugger_detached(int fd, short event, void *cbdata) static void open_fifo (void) { if (attach_fd > 0) { - close(attach_fd); + close(attach_fd); } attach_fd = open(MPIR_attach_fifo, O_RDONLY | O_NONBLOCK, 0); if (attach_fd < 0) { - opal_output(0, "%s unable to open debugger attach fifo", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - return; + opal_output(0, "%s unable to open debugger attach fifo", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + return; } /* Set this fd to be close-on-exec so that children don't see it */ @@ -2719,9 +2721,9 @@ static void open_fifo (void) } opal_output_verbose(2, orte_debug_output, - "%s Monitoring debugger attach fifo %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - MPIR_attach_fifo); + "%s Monitoring debugger attach fifo %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + MPIR_attach_fifo); attach = (opal_event_t*)malloc(sizeof(opal_event_t)); opal_event_set(orte_event_base, attach, attach_fd, OPAL_EV_READ, attach_debugger, attach); @@ -2738,16 +2740,16 @@ static void attach_debugger(int fd, short event, void *arg) if (fifo_active) { attach = (opal_event_t*)arg; - fifo_active = false; + fifo_active = false; rc = read(attach_fd, &fifo_cmd, sizeof(fifo_cmd)); - if (!rc) { + if (!rc) { /* release the current event */ opal_event_free(attach); - /* reopen device to clear hangup */ - open_fifo(); - return; - } + /* reopen device to clear hangup */ + open_fifo(); + return; + } if (1 != fifo_cmd) { /* ignore the cmd */ fifo_active = true;