Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions opal/mca/base/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -68,6 +69,7 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_base_component_priority_list_item_t);
*/
OPAL_DECLSPEC extern char *mca_base_component_path;
OPAL_DECLSPEC extern bool mca_base_component_show_load_errors;
OPAL_DECLSPEC extern bool mca_base_component_track_load_errors;
OPAL_DECLSPEC extern bool mca_base_component_disable_dlopen;
OPAL_DECLSPEC extern char *mca_base_system_default_path;
OPAL_DECLSPEC extern char *mca_base_user_default_path;
Expand Down
32 changes: 32 additions & 0 deletions opal/mca/base/mca_base_component_repository.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -55,6 +56,29 @@ OBJ_CLASS_INSTANCE(mca_base_component_repository_item_t, opal_list_item_t,

#endif /* OPAL_HAVE_DL_SUPPORT */

static void clf_constructor(opal_object_t *obj);
static void clf_destructor(opal_object_t *obj);

OBJ_CLASS_INSTANCE(mca_base_failed_component_t, opal_list_item_t,
clf_constructor, clf_destructor);


static void clf_constructor(opal_object_t *obj)
{
mca_base_failed_component_t *cli = (mca_base_failed_component_t *) obj;
cli->comp = NULL;
cli->error_msg = NULL;
}

static void clf_destructor(opal_object_t *obj)
{
mca_base_failed_component_t *cli = (mca_base_failed_component_t *) obj;
cli->comp = NULL;
if( NULL != cli->error_msg ) {
free(cli->error_msg);
cli->error_msg = NULL;
}
}

/*
* Private variables
Expand Down Expand Up @@ -408,6 +432,14 @@ int mca_base_component_repository_open (mca_base_framework_t *framework,
}
opal_output_verbose(vl, 0, "mca_base_component_repository_open: unable to open %s: %s (ignored)",
ri->ri_base, err_msg);

if( mca_base_component_track_load_errors ) {
mca_base_failed_component_t *f_comp = OBJ_NEW(mca_base_failed_component_t);
f_comp->comp = ri;
asprintf(&(f_comp->error_msg), "%s", err_msg);
opal_list_append(&framework->framework_failed_components, &f_comp->super);
}

return OPAL_ERR_BAD_PARAM;
}

Expand Down
12 changes: 12 additions & 0 deletions opal/mca/base/mca_base_component_repository.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -60,6 +61,17 @@ typedef struct mca_base_component_repository_item_t mca_base_component_repositor

OBJ_CLASS_DECLARATION(mca_base_component_repository_item_t);

/*
* Structure to track information about why a component failed to load.
*/
struct mca_base_failed_component_t {
opal_list_item_t super;
mca_base_component_repository_item_t *comp;
char *error_msg;
};
typedef struct mca_base_failed_component_t mca_base_failed_component_t;
OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_base_failed_component_t);

/**
* @brief initialize the component repository
*
Expand Down
6 changes: 6 additions & 0 deletions opal/mca/base/mca_base_framework.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -66,6 +67,7 @@ int mca_base_framework_register (struct mca_base_framework_t *framework,
}

OBJ_CONSTRUCT(&framework->framework_components, opal_list_t);
OBJ_CONSTRUCT(&framework->framework_failed_components, opal_list_t);

if (framework->framework_flags & MCA_BASE_FRAMEWORK_FLAG_NO_DSO) {
flags |= MCA_BASE_REGISTER_STATIC_ONLY;
Expand Down Expand Up @@ -228,12 +230,16 @@ int mca_base_framework_close (struct mca_base_framework_t *framework) {
framework->framework_output);
OBJ_RELEASE(item);
}
while (NULL != (item = opal_list_remove_first (&framework->framework_failed_components))) {
OBJ_RELEASE(item);
}
ret = OPAL_SUCCESS;
}

framework->framework_flags &= ~(MCA_BASE_FRAMEWORK_FLAG_REGISTERED | MCA_BASE_FRAMEWORK_FLAG_OPEN);

OBJ_DESTRUCT(&framework->framework_components);
OBJ_DESTRUCT(&framework->framework_failed_components);

framework_close_output (framework);

Expand Down
3 changes: 3 additions & 0 deletions opal/mca/base/mca_base_framework.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
/*
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -154,6 +155,8 @@ typedef struct mca_base_framework_t {
/** List of selected components (filled in by mca_base_framework_register()
or mca_base_framework_open() */
opal_list_t framework_components;
/** List of components that failed to load */
opal_list_t framework_failed_components;
} mca_base_framework_t;


Expand Down
9 changes: 9 additions & 0 deletions opal/mca/base/mca_base_open.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ int mca_base_opened = 0;
char *mca_base_system_default_path = NULL;
char *mca_base_user_default_path = NULL;
bool mca_base_component_show_load_errors = true;
bool mca_base_component_track_load_errors = false;
bool mca_base_component_disable_dlopen = false;

static char *mca_base_verbose = NULL;
Expand Down Expand Up @@ -111,6 +112,14 @@ int mca_base_open(void)
(void) mca_base_var_register_synonym(var_id, "opal", "mca", NULL, "component_show_load_errors",
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);

mca_base_component_track_load_errors = false;
var_id = mca_base_var_register("opal", "mca", "base", "component_track_load_errors",
"Whether to track errors for components that failed to load or not",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_base_component_track_load_errors);

mca_base_component_disable_dlopen = false;
var_id = mca_base_var_register("opal", "mca", "base", "component_disable_dlopen",
"Whether to attempt to disable opening dynamic components or not",
Expand Down
46 changes: 46 additions & 0 deletions opal/runtime/opal_info_support.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* reserved.
* Copyright (c) 2011-2012 University of Houston. All rights reserved.
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -50,6 +51,7 @@
#include "opal/mca/installdirs/installdirs.h"

#include "opal/runtime/opal_info_support.h"
#include "opal/mca/base/mca_base_component_repository.h"

const char *opal_info_path_prefix = "prefix";
const char *opal_info_path_bindir = "bindir";
Expand Down Expand Up @@ -109,6 +111,9 @@ OBJ_CLASS_INSTANCE(opal_info_component_map_t,
component_map_construct,
component_map_destruct);

static void opal_info_show_failed_component(const mca_base_component_repository_item_t* ri,
const char *error_msg);

int opal_info_init(int argc, char **argv,
opal_cmd_line_t *opal_info_cmd_line)
{
Expand Down Expand Up @@ -157,6 +162,8 @@ int opal_info_init(int argc, char **argv,
"Show only variables with at most this level (1-9)");
opal_cmd_line_make_opt3(opal_info_cmd_line, 's', NULL, "selected-only", 0,
"Show only variables from selected components");
opal_cmd_line_make_opt3(opal_info_cmd_line, '\0', NULL, "show-failed", 0,
"Show the components that failed to load along with the reason why they failed.");

/* set our threading level */
opal_set_using_threads(false);
Expand Down Expand Up @@ -223,6 +230,10 @@ int opal_info_init(int argc, char **argv,
opal_info_register_flags = MCA_BASE_REGISTER_DEFAULT;
}

if( opal_cmd_line_is_taken(opal_info_cmd_line, "show-failed") ) {
mca_base_component_track_load_errors = true;
}

return OPAL_SUCCESS;
}

Expand All @@ -245,6 +256,7 @@ static int info_register_framework (mca_base_framework_t *framework, opal_pointe
map = OBJ_NEW(opal_info_component_map_t);
map->type = strdup(framework->framework_name);
map->components = &framework->framework_components;
map->failed_components = &framework->framework_failed_components;
opal_pointer_array_add(component_map, map);
}

Expand Down Expand Up @@ -1012,6 +1024,7 @@ void opal_info_show_component_version(opal_pointer_array_t *mca_types,
bool want_all_types = false;
bool found;
mca_base_component_list_item_t *cli;
mca_base_failed_component_t *cli_failed;
int j;
char *pos;
opal_info_component_map_t *map;
Expand Down Expand Up @@ -1057,6 +1070,15 @@ void opal_info_show_component_version(opal_pointer_array_t *mca_types,
}
}

/* found it! */
OPAL_LIST_FOREACH(cli_failed, map->failed_components, mca_base_failed_component_t) {
mca_base_component_repository_item_t *ri = cli_failed->comp;
if (want_all_components ||
0 == strcmp(component_name, ri->ri_name) ) {
opal_info_show_failed_component(ri, cli_failed->error_msg);
}
}

if (!want_all_types) {
break;
}
Expand All @@ -1065,6 +1087,30 @@ void opal_info_show_component_version(opal_pointer_array_t *mca_types,
}


static void opal_info_show_failed_component(const mca_base_component_repository_item_t* ri,
const char *error_msg)
{
char *message, *content;

if (opal_info_pretty) {
asprintf(&message, "MCA %s", ri->ri_type);
asprintf(&content, "%s (failed to load) %s", ri->ri_name, error_msg);

opal_info_out(message, NULL, content);

free(message);
free(content);
} else {
asprintf(&message, "mca:%s:%s:failed", ri->ri_type, ri->ri_name);
asprintf(&content, "%s", error_msg);

opal_info_out(NULL, message, content);

free(message);
free(content);
}
}

/*
* Given a component, display its relevant version(s)
*/
Expand Down
2 changes: 2 additions & 0 deletions opal/runtime/opal_info_support.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -50,6 +51,7 @@ typedef struct {
opal_list_item_t super;
char *type;
opal_list_t *components;
opal_list_t *failed_components;
} opal_info_component_map_t;
OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_info_component_map_t);

Expand Down