From f5389cbb03adfba0a1466dff216f566dec37250b Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 27 May 2015 09:06:26 -0600 Subject: [PATCH 1/9] opal/keyval: fix coverity issues CID 1292738 Dereference after null check (FORWARD_NULL) It is an error if NULL is passed for val in add_to_env_str. Removed the NULL-check @ keyval_parse.c:253 and added a NULL check and an error return. CID 1292737 Logically dead code (DEADCODE) Coverity is correct, the error code at the end of parse_line_new is never reached. This means we fail to report parsing errors when parsing -x and -mca lines in keyval files. I moved the error code into the loop and removed the checks @ keyval_parse.c:314. I also named the parse state enum type and updated parse_line_new to use this type. Signed-off-by: Nathan Hjelm --- opal/util/keyval/keyval_lex.h | 3 +- opal/util/keyval_parse.c | 66 ++++++++++++++++++++++++++--------- 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/opal/util/keyval/keyval_lex.h b/opal/util/keyval/keyval_lex.h index 2ba41961d25..a4b6fb9e149 100644 --- a/opal/util/keyval/keyval_lex.h +++ b/opal/util/keyval/keyval_lex.h @@ -54,7 +54,7 @@ extern int opal_util_keyval_yylineno; #define YY_NO_UNPUT 1 #define YY_SKIP_YYWRAP 1 -enum { +enum opal_keyval_parse_state_t { OPAL_UTIL_KEYVAL_PARSE_DONE, OPAL_UTIL_KEYVAL_PARSE_ERROR, @@ -68,5 +68,6 @@ enum { OPAL_UTIL_KEYVAL_PARSE_MAX }; +typedef enum opal_keyval_parse_state_t opal_keyval_parse_state_t; #endif diff --git a/opal/util/keyval_parse.c b/opal/util/keyval_parse.c index 876a2e142ea..8561ecb7c6d 100644 --- a/opal/util/keyval_parse.c +++ b/opal/util/keyval_parse.c @@ -1,3 +1,23 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ #include "opal_config.h" @@ -19,7 +39,7 @@ static size_t key_buffer_len = 0; static opal_mutex_t keyval_mutex; static int parse_line(void); -static int parse_line_new(int first_val); +static int parse_line_new(opal_keyval_parse_state_t first_val); static void parse_error(int num); static char *env_str = NULL; @@ -249,31 +269,45 @@ static int save_param_name(const char* prefix, const char* suffix) static int add_to_env_str(char *var, char *val) { int sz, varsz, valsz; + void *tmp; + + if (NULL == var) { + return OPAL_ERR_BAD_PARAM; + } + if (NULL != env_str) { - varsz = (NULL != var) ? strlen(var) : 0; + varsz = strlen(var); valsz = (NULL != val) ? strlen(val) : 0; sz = strlen(env_str)+varsz+valsz+2; if (envsize <= sz) { envsize *=2; - env_str = realloc(env_str, envsize); - memset(env_str + strlen(env_str), 0, envsize/2); + + tmp = realloc(env_str, envsize); + if (NULL == tmp) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + env_str = tmp; } strcat(env_str, ";"); } else { - env_str = malloc(envsize); - memset(env_str, 0, envsize); + env_str = calloc(1, envsize); + if (NULL == env_str) { + return OPAL_ERR_OUT_OF_RESOURCE; + } } + strcat(env_str, var); if (NULL != val) { strcat(env_str, "="); strcat(env_str, val); } - return 0; + + return OPAL_SUCCESS; } -static int parse_line_new(int first_val) +static int parse_line_new(opal_keyval_parse_state_t first_val) { - int val; + opal_keyval_parse_state_t val; char *tmp; val = first_val; @@ -307,16 +341,14 @@ static int parse_line_new(int first_val) } else if (OPAL_UTIL_KEYVAL_PARSE_ENVVAR == val) { save_param_name("-x", "="); add_to_env_str(key_buffer, NULL); + } else { + /* we got something unexpected. Bonk! */ + parse_error(6); + return OPAL_ERROR; } - val = opal_util_keyval_yylex(); - } - if (OPAL_UTIL_KEYVAL_PARSE_DONE == val || - OPAL_UTIL_KEYVAL_PARSE_NEWLINE == val) { - return OPAL_SUCCESS; + val = opal_util_keyval_yylex(); } - /* Nope -- we got something unexpected. Bonk! */ - parse_error(6); - return OPAL_ERROR; + return OPAL_SUCCESS; } From 13e0a9da3a1a3e6968b793bb41bf8ae4b904528d Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 27 May 2015 09:19:56 -0600 Subject: [PATCH 2/9] sec/base: fix coverity issues CID 1292483 Uninitialized pointer read (UNINIT) Initialize the method and credential members of the opal_sec_cred_t to avoid possible invalid read when calling cleanup_cred. CID 1292484 Double free (USE_AFTER_FREE) Set method and credential members to NULL after freeing in cleanup_cred. Signed-off-by: Nathan Hjelm --- opal/mca/sec/base/sec_base_stubs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/opal/mca/sec/base/sec_base_stubs.c b/opal/mca/sec/base/sec_base_stubs.c index d6897866379..acf127ef615 100644 --- a/opal/mca/sec/base/sec_base_stubs.c +++ b/opal/mca/sec/base/sec_base_stubs.c @@ -1,5 +1,8 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,9 +29,11 @@ static void cleanup_cred(opal_sec_cred_t *cred) } if (NULL != cred->method) { free(cred->method); + cred->method = NULL; } if (NULL != cred->credential) { free(cred->credential); + cred->credential = NULL; } } @@ -99,7 +104,7 @@ int opal_sec_base_validate(char *payload, size_t size, char **method) opal_sec_handle_t *hdl; opal_buffer_t buf; int cnt, rc; - opal_sec_cred_t cred; + opal_sec_cred_t cred = {.method = NULL, .credential = NULL}; opal_output_verbose(5, opal_sec_base_framework.framework_output, "opal_sec: Received credential of size %lu", From 6b86e742186a137ba35e703626d148d512d9d2f6 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 27 May 2015 09:43:42 -0600 Subject: [PATCH 3/9] btl/openib: fix coverity issues CID 1269933 Uninitialized scalar variable (UNINIT) This CID isn't really an error but it is best for both valgrind and coverity cleanness to not write uninitialized data. Added an initializer for async_command in btl_openib_component_close. CID 1269930 Uninitialized scalar variable (UNINIT) Same as above. Best not to write uninitialized data. Added an initializer for async_command. CID 1269701 Logically dead code (DEADCODE) Coverity is correct. The smallest_pp_qp will always be 0. Changed the initial value so that the smallest_pp_qp is set as intended. If no per-per queue pair exists then use the last shared queue pair. This queue pair should have the smallest message size. This will reduce buffer waste. CID 1269713 Logically dead code (DEADCODE) False positive but easy to silence. The two check are meaningless if HAVE_XRC is 0 so protect them with #if HAVE_XRC. CID 1269726 Division or modulo by zero (DIVIDE_BY_ZERO) Indeed an issue. If we get an invalid value for rd_win then this will cause a divide-by-zero exception. Added a check to ensure rd_win is > 0. Also updated the help message to reflect this requirement. CID 1269672 Ignoring number of bytes read (CHECKED_RETURN) This error was somewhat intentional. Linux parameter files are probably not empty but it is safer to check the return code of read to make sure we got something. If 0 bytes are read this code could SEGV whe running strtoull. CID 1269836 Unintentional integer overflow (OVERFLOW_BEFORE_WIDEN) Add a range check to read_module_param to ensure we do not overflow. In the future it might be worthwhile to report an error because these parameters should never cause overflow in this calculation. CID 1269692 Calling risky function (DC.WEAK_CRYPTO) ??? This call was added in 2006 but I see no calls to the rest of the rand48 family of functions. Anyway, we SHOULD NEVER be calling seed48, srand, etc because it messes with user code. Removed the call to seed48. CID 1269823 Dereference null return value (NULL_RETURNS) This is likely a false positive. The endpoint lock is being held so no other thread should be able to remove fragments from the list. Also, mca_btl_openib_endpoint_post_send should not be removing items from the list. If a NULL fragment is ever returned it will likely be a coding error on the part of an Open MPI developer. Added an assert() to catch this and quiet the coverity error. CID 1269671 Unchecked return value (CHECKED_RETURN) Added a check for the return code of mca_btl_openib_endpoint_post_send to quiet the coverity error. It is unlikely this error path will be traversed. CID 1270229 Missing break in switch (MISSING_BREAK) Add a comment to indicate that the fall-through is intentional. CID 1269735 Dereference after null check (FORWARD_NULL) There should always be an endpoint when handling a work completion. The endpoint is either stored on the fragment or can be looked up using the immediate data. Move the immediate data code up and add an assert for a NULL endpoint. CID 1269740 Dereference after null check (FORWARD_NULL) CID 1269741 Explicit null dereferenced (FORWARD_NULL) Similar to CID 1269735 fix. Signed-off-by: Nathan Hjelm --- opal/mca/btl/openib/btl_openib_component.c | 99 +++++++++++++-------- opal/mca/btl/openib/help-mpi-btl-openib.txt | 3 +- 2 files changed, 66 insertions(+), 36 deletions(-) diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c index f0b6814556c..50ea399a306 100644 --- a/opal/mca/btl/openib/btl_openib_component.c +++ b/opal/mca/btl/openib/btl_openib_component.c @@ -240,8 +240,8 @@ static int btl_openib_component_close(void) /* Tell the async thread to shutdown */ if (mca_btl_openib_component.use_async_event_thread && 0 != mca_btl_openib_component.async_thread) { - mca_btl_openib_async_cmd_t async_command; - async_command.a_cmd = OPENIB_ASYNC_THREAD_EXIT; + mca_btl_openib_async_cmd_t async_command = {.a_cmd = OPENIB_ASYNC_THREAD_EXIT, + .fd = -1, .qp = NULL}; if (write(mca_btl_openib_component.async_pipe[1], &async_command, sizeof(mca_btl_openib_async_cmd_t)) < 0) { BTL_ERROR(("Failed to communicate with async event thread")); @@ -948,9 +948,9 @@ static void device_destruct(mca_btl_openib_device_t *device) /* signaling to async_tread to stop poll for this device */ if (mca_btl_openib_component.use_async_event_thread && -1 != mca_btl_openib_component.async_pipe[1]) { - mca_btl_openib_async_cmd_t async_command; - async_command.a_cmd = OPENIB_ASYNC_CMD_FD_REMOVE; - async_command.fd = device->ib_dev_context->async_fd; + mca_btl_openib_async_cmd_t async_command = {.a_cmd = OPENIB_ASYNC_CMD_FD_REMOVE, + .fd = device->ib_dev_context->async_fd, + .qp = NULL}; if (write(mca_btl_openib_component.async_pipe[1], &async_command, sizeof(mca_btl_openib_async_cmd_t)) < 0){ BTL_ERROR(("Failed to write to pipe")); @@ -1223,7 +1223,7 @@ static int setup_qps(void) int num_xrc_qps = 0, num_pp_qps = 0, num_srq_qps = 0, qp = 0; uint32_t max_qp_size, max_size_needed; int32_t min_freelist_size = 0; - int smallest_pp_qp = 0, ret = OPAL_ERROR; + int smallest_pp_qp = INT_MAX, ret = OPAL_ERROR; queues = opal_argv_split(mca_btl_openib_component.receive_queues, ':'); if (0 == opal_argv_count(queues)) { @@ -1264,6 +1264,8 @@ static int setup_qps(void) } qp++; } + +#if HAVE_XRC /* Current XRC implementation can't used with other QP types - PP and SRQ */ if (num_xrc_qps > 0 && (num_pp_qps > 0 || num_srq_qps > 0)) { @@ -1282,6 +1284,8 @@ static int setup_qps(void) ret = OPAL_ERR_BAD_PARAM; goto error; } +#endif + mca_btl_openib_component.num_pp_qps = num_pp_qps; mca_btl_openib_component.num_srq_qps = num_srq_qps; mca_btl_openib_component.num_xrc_qps = num_xrc_qps; @@ -1318,6 +1322,15 @@ static int setup_qps(void) /* by default set rd_low to be 3/4 of rd_num */ rd_low = atoi_param(P(3), rd_num - (rd_num / 4)); rd_win = atoi_param(P(4), (rd_num - rd_low) * 2); + + if (0 >= rd_win) { + opal_show_help("help-mpi-btl-openib.txt", + "invalid pp qp specification", true, + opal_process_info.nodename, queues[qp]); + ret = OPAL_ERR_BAD_PARAM; + goto error; + } + rd_rsv = atoi_param(P(5), (rd_num * 2) / rd_win); BTL_VERBOSE(("pp: rd_num is %d rd_low is %d rd_win %d rd_rsv %d", @@ -1437,7 +1450,11 @@ static int setup_qps(void) } mca_btl_openib_component.rdma_qp = mca_btl_openib_component.num_qps - 1; - mca_btl_openib_component.credits_qp = smallest_pp_qp; + if (mca_btl_openib_component.num_qps > smallest_pp_qp) { + mca_btl_openib_component.credits_qp = smallest_pp_qp; + } else { + mca_btl_openib_component.credits_qp = mca_btl_openib_component.num_qps - 1; + } ret = OPAL_SUCCESS; error: @@ -1453,23 +1470,33 @@ static int setup_qps(void) } /* read a single integer from a linux module parameters file */ -static uint64_t read_module_param(char *file, uint64_t value) +static uint64_t read_module_param(char *file, uint64_t value, uint64_t max) { int fd = open(file, O_RDONLY); char buffer[64]; uint64_t ret; + int rc; if (0 > fd) { return value; } - read (fd, buffer, 64); + rc = read (fd, buffer, 64); close (fd); + if (0 == rc) { + return value; + } + errno = 0; ret = strtoull(buffer, NULL, 10); + if (ret > max) { + /* NTH: probably should report a bogus value */ + ret = max; + } + return (0 == errno) ? ret : value; } @@ -1517,8 +1544,8 @@ static uint64_t calculate_max_reg (const char *device_name) } else if (!strncmp(device_name, "mlx4", 4)) { if (0 == stat("/sys/module/mlx4_core/parameters/log_num_mtt", &statinfo)) { - mtts_per_seg = 1 << read_module_param("/sys/module/mlx4_core/parameters/log_mtts_per_seg", 1); - num_mtt = 1 << read_module_param("/sys/module/mlx4_core/parameters/log_num_mtt", 20); + mtts_per_seg = 1 << read_module_param("/sys/module/mlx4_core/parameters/log_mtts_per_seg", 1, 63); + num_mtt = 1 << read_module_param("/sys/module/mlx4_core/parameters/log_mtts_per_seg", 1, 63); if (1 == num_mtt) { /* NTH: is 19 a minimum? when log_num_mtt is set to 0 use 19 */ num_mtt = 1 << 19; @@ -1530,9 +1557,9 @@ static uint64_t calculate_max_reg (const char *device_name) } else if (!strncmp(device_name, "mthca", 5)) { if (0 == stat("/sys/module/ib_mthca/parameters/num_mtt", &statinfo)) { - mtts_per_seg = 1 << read_module_param("/sys/module/ib_mthca/parameters/log_mtts_per_seg", 1); - num_mtt = read_module_param("/sys/module/ib_mthca/parameters/num_mtt", 1 << 20); - reserved_mtt = read_module_param("/sys/module/ib_mthca/parameters/fmr_reserved_mtts", 0); + mtts_per_seg = 1 << read_module_param("/sys/module/ib_mthca/parameters/log_mtts_per_seg", 1, 63); + num_mtt = read_module_param("/sys/module/ib_mthca/parameters/num_mtt", 1 << 20, (uint64_t) -1); + reserved_mtt = read_module_param("/sys/module/ib_mthca/parameters/fmr_reserved_mtts", 0, (uint64_t) -1); max_reg = (num_mtt - reserved_mtt) * opal_getpagesize () * mtts_per_seg; } else { @@ -2477,7 +2504,6 @@ btl_openib_component_init(int *num_btl_modules, mca_btl_openib_module_t * openib_btl; mca_btl_base_selected_module_t* ib_selected; opal_list_item_t* item; - unsigned short seedv[3]; mca_btl_openib_frag_init_data_t *init_data; struct dev_distance *dev_sorted; float distance; @@ -2517,11 +2543,6 @@ btl_openib_component_init(int *num_btl_modules, goto no_btls; } - seedv[0] = OPAL_PROC_MY_NAME.vpid; - seedv[1] = opal_timer_base_get_cycles(); - seedv[2] = opal_timer_base_get_cycles(); - seed48(seedv); - /* Read in INI files with device-specific parameters */ if (OPAL_SUCCESS != (ret = opal_btl_openib_ini_init())) { goto no_btls; @@ -2970,6 +2991,7 @@ static int progress_no_credits_pending_frags(mca_btl_base_endpoint_t *ep) ep->qps[qp].u.pp_qp.sd_credits > 0 || !BTL_OPENIB_QP_TYPE_PP(qp)); --len) { frag = opal_list_remove_first(&ep->qps[qp].no_credits_pending_frags[pri]); + assert (NULL != frag); /* If _endpoint_post_send() fails because of RESOURCE_BUSY, then the frag was re-added to the @@ -3326,19 +3348,25 @@ static char* btl_openib_component_status_to_string(enum ibv_wc_status status) static void progress_pending_frags_wqe(mca_btl_base_endpoint_t *ep, const int qpn) { - int i; + int ret; opal_list_item_t *frag; mca_btl_openib_qp_t *qp = ep->qps[qpn].qp; OPAL_THREAD_LOCK(&ep->endpoint_lock); - for(i = 0; i < 2; i++) { + for(int i = 0; i < 2; i++) { while(qp->sd_wqe > 0) { mca_btl_base_endpoint_t *tmp_ep; frag = opal_list_remove_first(&ep->qps[qpn].no_wqe_pending_frags[i]); if(NULL == frag) break; tmp_ep = to_com_frag(frag)->endpoint; - mca_btl_openib_endpoint_post_send(tmp_ep, to_send_frag(frag)); + ret = mca_btl_openib_endpoint_post_send(tmp_ep, to_send_frag(frag)); + if (OPAL_SUCCESS != ret) { + /* NTH: this handles retrying if we are out of credits but other errors are not + * handled (maybe abort?). */ + opal_list_prepend (&ep->qps[qpn].no_wqe_pending_frags[i], (opal_list_item_t *) frag); + break; + } } } OPAL_THREAD_UNLOCK(&ep->endpoint_lock); @@ -3388,10 +3416,20 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq, * to. For send fragments "order" contains QP idx the fragment was send * through */ qp = des->order; + + if (IBV_WC_RECV == wc->opcode && (wc->wc_flags & IBV_WC_WITH_IMM)) { +#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT + wc->imm_data = ntohl(wc->imm_data); +#endif + frag->endpoint = (mca_btl_openib_endpoint_t*) + opal_pointer_array_get_item(device->endpoints, wc->imm_data); + } + endpoint = frag->endpoint; - if(endpoint) - openib_btl = endpoint->endpoint_btl; + assert (NULL != endpoint); + + openib_btl = endpoint->endpoint_btl; if(wc->status != IBV_WC_SUCCESS) { OPAL_OUTPUT((-1, "Got WC: ERROR")); @@ -3412,6 +3450,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq, get_frag->cb.func (&openib_btl->super, endpoint, (void *)(intptr_t) frag->sg_entry.addr, get_frag->cb.local_handle, get_frag->cb.context, get_frag->cb.data, OPAL_SUCCESS); + /* fall through */ case IBV_WC_RDMA_WRITE: if (MCA_BTL_OPENIB_FRAG_SEND_USER == openib_frag_type(des)) { mca_btl_openib_put_frag_t *put_frag = to_put_frag(des); @@ -3476,16 +3515,6 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq, OPAL_OUTPUT((-1, "Got WC: RDMA_RECV, qp %d, src qp %d, WR ID %" PRIx64, wc->qp_num, wc->src_qp, wc->wr_id)); -#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT - wc->imm_data = ntohl(wc->imm_data); -#endif - if(wc->wc_flags & IBV_WC_WITH_IMM) { - endpoint = (mca_btl_openib_endpoint_t*) - opal_pointer_array_get_item(device->endpoints, wc->imm_data); - frag->endpoint = endpoint; - openib_btl = endpoint->endpoint_btl; - } - /* Process a RECV */ if(btl_openib_handle_incoming(openib_btl, endpoint, to_recv_frag(frag), wc->byte_len) != OPAL_SUCCESS) { diff --git a/opal/mca/btl/openib/help-mpi-btl-openib.txt b/opal/mca/btl/openib/help-mpi-btl-openib.txt index 2dbbbe86384..031d10142b0 100644 --- a/opal/mca/btl/openib/help-mpi-btl-openib.txt +++ b/opal/mca/btl/openib/help-mpi-btl-openib.txt @@ -365,7 +365,8 @@ Per-peer receive queues require between 2 and 5 parameters: 1. Buffer size in bytes (mandatory) 2. Number of buffers (mandatory) 3. Low buffer count watermark (optional; defaults to (num_buffers / 2)) - 4. Credit window size (optional; defaults to (low_watermark / 2)) + 4. Credit window size (optional; defaults to (low_watermark / 2), + must be > 0) 5. Number of buffers reserved for credit messages (optional; defaults to (num_buffers*2-1)/credit_window) From 32d4d7b6eaa3d6ee995e54aa1c255259cd7cb0b4 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 27 May 2015 12:04:26 -0600 Subject: [PATCH 4/9] opal/dss: silence coverity issues CID 1269988 Use after free (USE_AFTER_FREE) CID 1269987 Use after free (USE_AFTER_FREE) Both are false positives as convert is always overwritten by the call to opal_dss_unpack_string(). Set convert to prevent this issue from re-appearing. Signed-off-by: Nathan Hjelm --- opal/dss/dss_unpack.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/opal/dss/dss_unpack.c b/opal/dss/dss_unpack.c index 2ab586eed90..7bd0a0bae4d 100644 --- a/opal/dss/dss_unpack.c +++ b/opal/dss/dss_unpack.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -9,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -430,6 +431,7 @@ int opal_dss_unpack_float(opal_buffer_t *buffer, void *dest, tmp = strtof(convert, NULL); memcpy(&desttmp[i], &tmp, sizeof(tmp)); free(convert); + convert = NULL; } return OPAL_SUCCESS; } @@ -460,6 +462,7 @@ int opal_dss_unpack_double(opal_buffer_t *buffer, void *dest, tmp = strtod(convert, NULL); memcpy(&desttmp[i], &tmp, sizeof(tmp)); free(convert); + convert = NULL; } return OPAL_SUCCESS; } From 43d678e7ca042d43faea216c43d7792c88915138 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 27 May 2015 12:17:59 -0600 Subject: [PATCH 5/9] btl/openib: fix more coverity issues CID 1269931 Uninitialized scalar variable (UNINIT) Initialize complete async message. This was not a bug but the fix contributes to valgrind cleanness (uninitialed write). CID 1269915 Unintended sign extension (SIGN_EXTENSION) Should never happen. Quieting this by explicitly casting to uint64_t. CID 1269824 Dereference null return value (NULL_RETURNS) It is impossible for opal_list_remove_first to return NULL if opal_list_is_empty returns false. I refactored the code in question to not use opal_list_is_empty but loop until NULL is returned by opal_list_remove_first. That will quiet the issue. CID 1269913 Dereference before null check (REVERSE_INULL) The storage parameter should never be NULL. The check intended to check if *storage was NULL not storage. Signed-off-by: Nathan Hjelm --- opal/mca/btl/openib/btl_openib.c | 8 ++++---- opal/mca/btl/openib/btl_openib_frag.h | 17 +++++++++-------- opal/mca/btl/openib/btl_openib_mca.c | 6 ++++-- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/opal/mca/btl/openib/btl_openib.c b/opal/mca/btl/openib/btl_openib.c index f473f3f373c..6286a8ca814 100644 --- a/opal/mca/btl/openib/btl_openib.c +++ b/opal/mca/btl/openib/btl_openib.c @@ -641,7 +641,9 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) } if(mca_btl_openib_component.use_async_event_thread) { - mca_btl_openib_async_cmd_t async_command; + mca_btl_openib_async_cmd_t async_command = {.a_cmd = OPENIB_ASYNC_CMD_FD_ADD, + .fd = device->ib_dev_context->async_fd, + .qp = NULL}; /* start the async even thread if it is not already started */ if (start_async_event_thread() != OPAL_SUCCESS) @@ -649,8 +651,6 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) device->got_fatal_event = false; device->got_port_event = false; - async_command.a_cmd = OPENIB_ASYNC_CMD_FD_ADD; - async_command.fd = device->ib_dev_context->async_fd; if (write(mca_btl_openib_component.async_pipe[1], &async_command, sizeof(mca_btl_openib_async_cmd_t))<0){ BTL_ERROR(("Failed to write to pipe [%d]",errno)); @@ -699,7 +699,7 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device) if (mca_btl_openib_component.max_eager_rdma > 0 && device->use_eager_rdma) { device->eager_rdma_buffers = - (mca_btl_base_endpoint_t **) calloc(mca_btl_openib_component.max_eager_rdma * device->btls, + (mca_btl_base_endpoint_t **) calloc((size_t) mca_btl_openib_component.max_eager_rdma * device->btls, sizeof(mca_btl_openib_endpoint_t*)); if(NULL == device->eager_rdma_buffers) { BTL_ERROR(("Memory allocation fails")); diff --git a/opal/mca/btl/openib/btl_openib_frag.h b/opal/mca/btl/openib/btl_openib_frag.h index 392728f9d33..82d727ee442 100644 --- a/opal/mca/btl/openib/btl_openib_frag.h +++ b/opal/mca/btl/openib/btl_openib_frag.h @@ -427,14 +427,15 @@ static inline mca_btl_openib_coalesced_frag_t *alloc_coalesced_frag(void) do { \ opal_free_list_return (to_base_frag(frag)->list, \ (opal_free_list_item_t*)(frag)); \ - } while(0); - -#define MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(list) \ - while(!opal_list_is_empty(list)){ \ - opal_list_item_t *frag_item; \ - frag_item = opal_list_remove_first(list); \ - MCA_BTL_IB_FRAG_RETURN(frag_item); \ - } \ + } while(0) + +#define MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(list) \ + do { \ + opal_list_item_t *_frag_item; \ + while (NULL != (_frag_item = opal_list_remove_first(list))) { \ + MCA_BTL_IB_FRAG_RETURN(_frag_item); \ + } \ + } while (0) struct mca_btl_openib_module_t; diff --git a/opal/mca/btl/openib/btl_openib_mca.c b/opal/mca/btl/openib/btl_openib_mca.c index c9b1866fcac..8918d1eeee3 100644 --- a/opal/mca/btl/openib/btl_openib_mca.c +++ b/opal/mca/btl/openib/btl_openib_mca.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved. - * Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. @@ -105,6 +105,8 @@ static int reg_string(const char* param_name, { int index; + assert (NULL != storage); + /* The MCA variable system will not change this pointer */ *storage = (char *) default_value; index = mca_base_component_var_register(&mca_btl_openib_component.super.btl_version, @@ -117,7 +119,7 @@ static int reg_string(const char* param_name, MCA_BASE_VAR_SYN_FLAG_DEPRECATED); } - if (0 != (flags & REGSTR_EMPTY_OK) && (NULL == storage || 0 == strlen(*storage))) { + if (0 != (flags & REGSTR_EMPTY_OK) && (NULL == *storage || 0 == strlen(*storage))) { opal_output(0, "Bad parameter value for parameter \"%s\"", param_name); return OPAL_ERR_BAD_PARAM; From 3edb421adce8144a1b5a90aa197ae7b50f7fe62b Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 27 May 2015 12:39:14 -0600 Subject: [PATCH 6/9] common/verbs: fix coverity issues CID 1269864 Resource leak (RESOURCE_LEAK) CID 1269865 Resource leak (RESOURCE_LEAK) Slightly refactored the code to remove extra goto statements and ensure the if_include_list and if_exclude_list are actually released on success. Signed-off-by: Nathan Hjelm --- .../common/verbs/common_verbs_find_ports.c | 60 +++++++++---------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/opal/mca/common/verbs/common_verbs_find_ports.c b/opal/mca/common/verbs/common_verbs_find_ports.c index 9fe38891425..fc2575c54f7 100644 --- a/opal/mca/common/verbs/common_verbs_find_ports.c +++ b/opal/mca/common/verbs/common_verbs_find_ports.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. @@ -226,21 +227,34 @@ opal_list_t *opal_common_verbs_find_ports(const char *if_include, opal_common_verbs_device_item_t *di; opal_common_verbs_port_item_t *pi; int rc; - uint32_t i, j; + uint32_t j; opal_list_t *port_list = NULL; - opal_list_item_t *item; bool want; + /* Sanity check the include/exclude params */ + if (NULL != if_include && NULL != if_exclude) { + return NULL; + } + + /* Query all the IBV devices on the machine. Use an ompi + compatibility function, because how to get this list changed + over the history of the IBV API. */ + devices = opal_ibv_get_device_list(&num_devs); + if (0 == num_devs) { + opal_output_verbose(5, stream, "no verbs interfaces found"); + return NULL; + } + + opal_output_verbose(5, stream, "found %d verbs interface%s", + num_devs, (num_devs != 1) ? "s" : ""); + /* Allocate a list to fill */ port_list = OBJ_NEW(opal_list_t); if (NULL == port_list) { - goto err_free_argv; + return NULL; } - /* Sanity check the include/exclude params */ - if (NULL != if_include && NULL != if_exclude) { - return port_list; - } else if (NULL != if_include) { + if (NULL != if_include) { opal_output_verbose(5, stream, "finding verbs interfaces, including %s", if_include); if_include_list = opal_argv_split(if_include, ','); @@ -252,22 +266,10 @@ opal_list_t *opal_common_verbs_find_ports(const char *if_include, if_sanity_list = opal_argv_copy(if_exclude_list); } - /* Query all the IBV devices on the machine. Use an ompi - compatibility function, because how to get this list changed - over the history of the IBV API. */ - devices = opal_ibv_get_device_list(&num_devs); - if (0 == num_devs) { - opal_output_verbose(5, stream, "no verbs interfaces found"); - goto err_free_argv; - } else { - opal_output_verbose(5, stream, "found %d verbs interface%s", - num_devs, (num_devs != 1) ? "s" : ""); - } - /* Now loop through all the devices. Get the attributes for each port on each device to see if they match our selection criteria. */ - for (i = 0; (int32_t) i < num_devs; ++i) { + for (int32_t i = 0; (int32_t) i < num_devs; ++i) { /* See if this device is on the include/exclude sanity check list. If it is, remove it from the sanity check list (i.e., we should end up with an empty list at the end if @@ -481,27 +483,23 @@ opal_list_t *opal_common_verbs_find_ports(const char *if_include, opal_argv_free(if_sanity_list); } + opal_argv_free(if_include_list); + opal_argv_free(if_exclude_list); + /* All done! */ opal_ibv_free_device_list(devices); return port_list; err_free_port_list: - for (item = opal_list_remove_first(port_list); - item != NULL; - item = opal_list_remove_first(port_list)) { - OBJ_RELEASE(item); - } + OPAL_LIST_RELEASE(port_list); opal_ibv_free_device_list(devices); - err_free_argv: if (NULL != if_sanity_list) { opal_argv_free(if_sanity_list); - if_sanity_list = NULL; } + opal_argv_free(if_include_list); - if_include_list = NULL; opal_argv_free(if_exclude_list); - if_exclude_list = NULL; - return port_list; + return NULL; } From 0e3c32a98a843c1f4f58b04bc08315fb3349bb0c Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 27 May 2015 13:39:42 -0600 Subject: [PATCH 7/9] opal/sys_limits: fix coverity issue CID 996175 Dereference before null check (REVERSE_NULL) If lims is NULL then we ran out of memory. Return an error and remove the NULL check at cleanup. Signed-off-by: Nathan Hjelm --- opal/util/sys_limits.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/opal/util/sys_limits.c b/opal/util/sys_limits.c index 943d3354a71..1a6ea4d0e91 100644 --- a/opal/util/sys_limits.c +++ b/opal/util/sys_limits.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -10,8 +11,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. - * All rights reserved. + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights + * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -119,6 +120,9 @@ int opal_util_init_sys_limits(char **errmsg) /* parse the requested limits to set */ lims = opal_argv_split(opal_set_max_sys_limits, ','); + if (NULL == lims) { + return OPAL_ERR_OUT_OF_RESOURCE; + } /* each limit is expressed as a "param:value" pair */ for (i=0; NULL != lims[i]; i++) { @@ -223,9 +227,7 @@ int opal_util_init_sys_limits(char **errmsg) rc = OPAL_SUCCESS; out: - if (NULL != lims) { - opal_argv_free(lims); - } + opal_argv_free(lims); if (NULL != lim) { opal_argv_free(lim); } From 9353fcea9590f9528e200ea17d57c3e6d67a44c5 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 27 May 2015 13:46:38 -0600 Subject: [PATCH 8/9] crs/base: fix coverity issues CID 1196720 Resource leak (RESOURCE_LEAK) CID 1196721 Resource leak (RESOURCE_LEAK) The code in question does leak loc_token and loc_value. Cleaned up the code a bit and plugged the leak. Signed-off-by: Nathan Hjelm --- opal/mca/crs/base/crs_base_fns.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/opal/mca/crs/base/crs_base_fns.c b/opal/mca/crs/base/crs_base_fns.c index 1363c6cf4d0..067e895cebd 100644 --- a/opal/mca/crs/base/crs_base_fns.c +++ b/opal/mca/crs/base/crs_base_fns.c @@ -115,19 +115,11 @@ OBJ_CLASS_INSTANCE(opal_crs_base_ckpt_options_t, * Utility functions */ int opal_crs_base_metadata_read_token(FILE *metadata, char * token, char ***value) { - int exit_status = OPAL_SUCCESS; - char * loc_token = NULL; - char * loc_value = NULL; int argc = 0; /* Dummy check */ - if( NULL == token ) { - exit_status = OPAL_ERROR; - goto cleanup; - } - if( NULL == metadata ) { - exit_status = OPAL_ERROR; - goto cleanup; + if (NULL == token || NULL == metadata) { + return OPAL_ERROR; } /* @@ -135,6 +127,8 @@ int opal_crs_base_metadata_read_token(FILE *metadata, char * token, char ***valu */ rewind(metadata); do { + char *loc_token = NULL, *loc_value = NULL; + /* Get next token */ if( OPAL_SUCCESS != metadata_extract_next_token(metadata, &loc_token, &loc_value) ) { break; @@ -144,13 +138,12 @@ int opal_crs_base_metadata_read_token(FILE *metadata, char * token, char ***valu if(0 == strncmp(token, loc_token, strlen(loc_token)) ) { opal_argv_append(&argc, value, loc_value); } - } while(0 == feof(metadata) ); - - cleanup: - if (NULL != metadata) { - rewind(metadata); - } - return exit_status; + + free (loc_token); + free (loc_value); + } while (0 == feof(metadata)); + + return OPAL_SUCCESS; } int opal_crs_base_extract_expected_component(FILE *metadata, char ** component_name, int *prev_pid) From ceb319170a3ea4356257aa4c131afc5ac3574ea9 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 27 May 2015 14:03:57 -0600 Subject: [PATCH 9/9] btl/openib: fix more coverity issues CID 1269674 Ignoring number of bytes read (CHECKED_RETURN) Check that we read enough bytes to get a complete async command. CID 1269793 Missing break in switch (MISSING_BREAK) Added comment to indicate fall through was intentional. CID 1269702: Constant variable guards dead code (DEADCODE) Remove an unused argument to opal_show_help. This will quiet the coverity issue. CID 1269675 Ignoring number of bytes read (CHECKED_RETURN) Check that at least sizeof(int) bytes are read. If this is not the case then it is an error. Signed-off-by: Nathan Hjelm --- opal/mca/btl/openib/btl_openib_async.c | 28 +++++++++++------------ opal/mca/btl/openib/btl_openib_endpoint.c | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/opal/mca/btl/openib/btl_openib_async.c b/opal/mca/btl/openib/btl_openib_async.c index 64dad4f3aaa..7f6ec499d9b 100644 --- a/opal/mca/btl/openib/btl_openib_async.c +++ b/opal/mca/btl/openib/btl_openib_async.c @@ -4,7 +4,7 @@ * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. @@ -170,9 +170,10 @@ static int btl_openib_async_commandh(struct mca_btl_openib_async_poll *devices_p { struct pollfd *async_pollfd_tmp; mca_btl_openib_async_cmd_t cmd; - int fd,flags,j; + int fd,flags,j,ret; /* Got command from main thread */ - if (read(devices_poll->async_pollfd[0].fd, &cmd, sizeof(mca_btl_openib_async_cmd_t)) < 0) { + ret = read(devices_poll->async_pollfd[0].fd, &cmd, sizeof(mca_btl_openib_async_cmd_t)); + if (sizeof(mca_btl_openib_async_cmd_t) != ret) { BTL_ERROR(("Read failed [%d]",errno)); return OPAL_ERROR; } @@ -328,7 +329,6 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po int j; mca_btl_openib_device_t *device = NULL; struct ibv_async_event event; - bool xrc_event = false; int event_type; /* We need to find correct device and process this event */ @@ -356,6 +356,7 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po /* is it XRC event ?*/ #if OPAL_HAVE_CONNECTX_XRC_DOMAINS #else + bool xrc_event = false; if (IBV_XRC_QP_EVENT_FLAG & event.event_type) { xrc_event = true; /* Clean the bitnd handel as usual */ @@ -368,14 +369,14 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po BTL_ERROR(("Alternative path migration event reported")); if (APM_ENABLED) { BTL_ERROR(("Trying to find additional path...")); - if (!xrc_event) - mca_btl_openib_load_apm(event.element.qp, - qp2endpoint(event.element.qp, device)); #if HAVE_XRC && !OPAL_HAVE_CONNECTX_XRC_DOMAINS - else + if (xrc_event) mca_btl_openib_load_apm_xrc_rcv(event.element.xrc_qp_num, xrc_qp2endpoint(event.element.xrc_qp_num, device)); + else #endif + mca_btl_openib_load_apm(event.element.qp, + qp2endpoint(event.element.qp, device)); } break; case IBV_EVENT_DEVICE_FATAL: @@ -383,6 +384,7 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po device->got_fatal_event = true; /* It is not critical to protect the counter */ OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1); + /* fall through */ case IBV_EVENT_CQ_ERR: case IBV_EVENT_QP_FATAL: if (event_type == IBV_EVENT_QP_FATAL) { @@ -415,15 +417,13 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po opal_show_help("help-mpi-btl-openib.txt", "of error event", true,opal_process_info.nodename, (int)getpid(), event_type, - openib_event_to_str((enum ibv_event_type)event_type), - xrc_event ? "true" : "false"); + openib_event_to_str((enum ibv_event_type)event_type)); break; case IBV_EVENT_PORT_ERR: opal_show_help("help-mpi-btl-openib.txt", "of error event", true,opal_process_info.nodename, (int)getpid(), event_type, - openib_event_to_str((enum ibv_event_type)event_type), - xrc_event ? "true" : "false"); + openib_event_to_str((enum ibv_event_type)event_type)); /* Set the flag to indicate port error */ device->got_port_event = true; OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1); @@ -451,7 +451,7 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po default: opal_show_help("help-mpi-btl-openib.txt", "of unknown event", true,opal_process_info.nodename, (int)getpid(), - event_type, xrc_event ? "true" : "false"); + event_type); } ibv_ack_async_event(&event); } else { @@ -545,7 +545,7 @@ int btl_openib_async_command_done(int exp) { int comp; if (read(mca_btl_openib_component.async_comp_pipe[0], &comp, - sizeof(int)) < 0){ + sizeof(int)) < (int) sizeof (int)){ BTL_ERROR(("Failed to read from pipe")); return OPAL_ERROR; } diff --git a/opal/mca/btl/openib/btl_openib_endpoint.c b/opal/mca/btl/openib/btl_openib_endpoint.c index c117acd1c2b..540c3902c06 100644 --- a/opal/mca/btl/openib/btl_openib_endpoint.c +++ b/opal/mca/btl/openib/btl_openib_endpoint.c @@ -689,7 +689,7 @@ static void mca_btl_openib_endpoint_credits( /* we don't acquire a WQE for credit message - so decrement. * Note: doing it for QP used for credit management */ - qp_get_wqe(ep, des->order); + (void) qp_get_wqe(ep, des->order); if(check_send_credits(ep, qp) || check_eager_rdma_credits(ep)) mca_btl_openib_endpoint_send_credits(ep, qp);