From 7b7a06728c6d5e6c9d38794f81f803c471d0f125 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Tue, 11 Aug 2015 12:35:41 -0700 Subject: [PATCH] lama: remove for the v2.x series --- orte/mca/rmaps/lama/.opal_ignore | 0 orte/mca/rmaps/lama/Makefile.am | 40 - orte/mca/rmaps/lama/configure.m4 | 19 - orte/mca/rmaps/lama/help-orte-rmaps-lama.txt | 173 -- orte/mca/rmaps/lama/owner.txt | 7 - orte/mca/rmaps/lama/rmaps_lama.h | 177 -- orte/mca/rmaps/lama/rmaps_lama_component.c | 136 -- orte/mca/rmaps/lama/rmaps_lama_max_tree.c | 1182 ----------- orte/mca/rmaps/lama/rmaps_lama_module.c | 1916 ------------------ orte/mca/rmaps/lama/rmaps_lama_params.c | 878 -------- 10 files changed, 4528 deletions(-) delete mode 100644 orte/mca/rmaps/lama/.opal_ignore delete mode 100644 orte/mca/rmaps/lama/Makefile.am delete mode 100644 orte/mca/rmaps/lama/configure.m4 delete mode 100644 orte/mca/rmaps/lama/help-orte-rmaps-lama.txt delete mode 100644 orte/mca/rmaps/lama/owner.txt delete mode 100644 orte/mca/rmaps/lama/rmaps_lama.h delete mode 100644 orte/mca/rmaps/lama/rmaps_lama_component.c delete mode 100644 orte/mca/rmaps/lama/rmaps_lama_max_tree.c delete mode 100644 orte/mca/rmaps/lama/rmaps_lama_module.c delete mode 100644 orte/mca/rmaps/lama/rmaps_lama_params.c diff --git a/orte/mca/rmaps/lama/.opal_ignore b/orte/mca/rmaps/lama/.opal_ignore deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/orte/mca/rmaps/lama/Makefile.am b/orte/mca/rmaps/lama/Makefile.am deleted file mode 100644 index fabcb55e3f..0000000000 --- a/orte/mca/rmaps/lama/Makefile.am +++ /dev/null @@ -1,40 +0,0 @@ -# -# Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. -# -# Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_ortedata_DATA = help-orte-rmaps-lama.txt - -sources = \ - rmaps_lama_module.c \ - rmaps_lama_max_tree.c \ - rmaps_lama_params.c \ - rmaps_lama.h \ - rmaps_lama_component.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_rmaps_lama_DSO -component_noinst = -component_install = mca_rmaps_lama.la -else -component_noinst = libmca_rmaps_lama.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_rmaps_lama_la_SOURCES = $(sources) -mca_rmaps_lama_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_rmaps_lama_la_SOURCES =$(sources) -libmca_rmaps_lama_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/rmaps/lama/configure.m4 b/orte/mca/rmaps/lama/configure.m4 deleted file mode 100644 index 2424644aa7..0000000000 --- a/orte/mca/rmaps/lama/configure.m4 +++ /dev/null @@ -1,19 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2012 Los Alamos National Security, LLC. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# MCA_rmaps_lama_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_orte_rmaps_lama_CONFIG], [ - AC_CONFIG_FILES([orte/mca/rmaps/lama/Makefile]) - - AS_IF([test "$OPAL_HAVE_HWLOC" = 1], - [$1], - [$2]) -]) diff --git a/orte/mca/rmaps/lama/help-orte-rmaps-lama.txt b/orte/mca/rmaps/lama/help-orte-rmaps-lama.txt deleted file mode 100644 index fa3a987b4d..0000000000 --- a/orte/mca/rmaps/lama/help-orte-rmaps-lama.txt +++ /dev/null @@ -1,173 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. -# Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for LAMA Mapper. -# -[orte-rmaps-lama:multi-apps-and-zero-np] -RMAPS found multiple applications to be launched, with at least one that failed -to specify the number of processes to execute. When specifying multiple -applications, you must specify how many processes of each to launch via the --np argument. -# -[orte-rmaps-lama:oversubscribe] -RMaps LAMA detected oversubscription after mapping %d of %d processes. -Since you have asked not to oversubscribe the resources the job will not -be launched. If you would instead like to oversubscribe the resources -try using the --oversubscribe option to mpirun. -# -[orte-rmaps-lama:no-resources-available] -RMaps LAMA detected that there are not enough resources to map the -remainder of the job. Check the command line options, and the number of -nodes allocated to this job. - Application Context : %d - # of Processes Successfully Mapped: %d - # of Processes Requested : %d - Mapping : %s - Binding : %s - MPPR : %s - Ordering : %s -# -[orte-rmaps-lama:merge-conflict-bad-prune-src] -RMaps LAMA detected that it needed to prune a level of the hierarchy that -was necessary for one of the command line parameters. Check your allocation -and the options below to make sure they are correct. - Conflicting Level Description: %s - Mapping : %s - Binding : %s - MPPR : %s - Ordering : %s -# -[invalid mapping option] -The specified mapping option is not supported with the LAMA rmaps -mapper: - - Specified mapping option: %s - Reason it is invalid: %s - -LAMA supports the following options to the mpirun --map-by option: - - node, numa, socket, l1cache, l2cache, l3cache, core, hwthread, slot - -Alternatively, LAMA supports specifying a sequence of letters in the -rmaps_lama_map MCA parameter; each letter indicates a "direction" for -mapping. The rmaps_lama_map MCA parameter is richer/more flexible -than the --may-by CLI option. If rmaps_lama_map is specified, the -following letters must be specified: - - h: hardware thread - c: processor core - s: processor socket - n: node (server) - -The following may also optionally be included in the mapping string: - - N: NUMA node - L1: L1 cache - L2: L2 cache - L3: L3 cache - -For example, the two commands below are equivalent: - - mpirun --mca rmaps lama --mca rmaps_lama_map csNh ... - mpirun --mca rmaps lama --map-by core ... -# -[invalid binding option] -The specified binding option is not supported with the LAMA rmaps -mapper: - - Specified binding option: %s - Reason it is invalid: %s - -LAMA binding options can be specified via the mpirun --bind-to command -line option or rmaps_lama_bind MCA param: - - --bind-to rmaps_lama_binding - Locality option option - ---------------- --------- ------------------ - Hardware thread hwthread h - Processor core core c - Processor socket socket s - NUMA node numa N - L1 cache l1cache L1 - L2 cache l2cache L2 - L3 cache l3cache L3 - Node (server) node n - -The --bind-to option assumes a single locality (e.g., bind each MPI -process to a single core, socket, etc.). The rmaps_lama_bind MCA -param requires an integer specifying how many localities to which to -bind. For example, the following two command lines are equivalent, -and bind each MPI process to a single core: - - mpirun --btl rmaps lama --mca rmaps_lama_bind 1c ... - mpirun --btl rmaps lama --bind-to core ... - -The rmaps_lama_bind MCA parameter is more flexible than the --bind-to -CLI option, because it allows binding to multiple resources. For -example, specifing an rmaps_lama_bind value of "2c" binds each MPI -process to two cores. -# -[invalid ordering option] -The specified ordering option is not supported. - - Specified ordering option: %s - -The LAMA ordering can be specified via the rmaps_lama_ordering MCA -parameter. - -Two options are supported for ordering ranks in MPI_COMM_WORLD (MCW): - - s: Sequential. MCW rank ordering is sequential by hardware thread - across all nodes. E.g., MCW rank 0 is the first process on node - 0; MCW rank 1 is the second process on node 0, and so on. - n: Natural. MCW rank ordering follows the "natural" mapping layout. - For example, in a by-socket layout, MCW rank 0 is the first - process on the 1st socket on node 0. MCW rank 1 is then the - first process on the 2nd socket on node 0. And so on. -# -[invalid mppr option] -The specified Max Processes Per Resource (MPPR) value is invalid (in -the rmaps_lama_mppr MCA paramter): - - Specified MPPR: %s - Reason is is invalid: %s - -The MPPR is a comma-delimited list of specifications indicating how -many processes are allowed on a given type of resource before an MPI -job is considered to have oversubscribed that resource. Each -specification is a token in the format of "NUMBER:RESOURCE". For -example, the default MPPR of "1:c" means that Open MPI will map one -process per processor core before considering cores to be -oversubscribed. - -Multiple specifications may be useful; for example "1:c,2:s" maintains -the default one-process-per-core limitation, but places an additional -limitation of only two processes per processor socket (assuming that -there are more than two cores per socket). - -The LAMA MPPR specifications are set via the rmaps_lama_mppr MCA -parameter. The following resources can be specified: - - Hardware thread h - Processor core c - Processor socket s - NUMA node N - L1 cache L1 - L2 cache L2 - L3 cache L3 - Node (server) n -# -[internal error] -An unexpected internal error occurred in the LAMA mapper; your job -will now fail. Sorry. - - File: %s - Message: %s diff --git a/orte/mca/rmaps/lama/owner.txt b/orte/mca/rmaps/lama/owner.txt deleted file mode 100644 index 0cc0384f0e..0000000000 --- a/orte/mca/rmaps/lama/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: CISCO -status: maintenance diff --git a/orte/mca/rmaps/lama/rmaps_lama.h b/orte/mca/rmaps/lama/rmaps_lama.h deleted file mode 100644 index cea27e56a6..0000000000 --- a/orte/mca/rmaps/lama/rmaps_lama.h +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Resource Mapping - */ -#ifndef ORTE_RMAPS_LAMA_H -#define ORTE_RMAPS_LAMA_H - -#include "orte_config.h" - -#include "opal/class/opal_tree.h" - -#include "orte/mca/rmaps/rmaps.h" - -BEGIN_C_DECLS - -ORTE_MODULE_DECLSPEC extern orte_rmaps_base_component_t mca_rmaps_lama_component; - -extern orte_rmaps_base_module_t orte_rmaps_lama_module; - - -/********************************* - * Structures & Defines - *********************************/ -/* - * JJH: Can we reuse the opal_hwloc_level_t data structure in - * opal/mca/hwloc/hwloc.h - */ -typedef enum { - LAMA_LEVEL_MACHINE = 0, - LAMA_LEVEL_BOARD = 1, - LAMA_LEVEL_NUMA = 2, - LAMA_LEVEL_SOCKET = 3, - LAMA_LEVEL_CACHE_L3 = 4, - LAMA_LEVEL_CACHE_L2 = 5, - LAMA_LEVEL_CACHE_L1 = 6, - LAMA_LEVEL_CORE = 7, - LAMA_LEVEL_PU = 8, - LAMA_LEVEL_UNKNOWN = 9 -} rmaps_lama_level_type_t; - -typedef enum { - LAMA_ORDER_NATURAL = 0, - LAMA_ORDER_SEQ = 1 -} rmaps_lama_order_type_t; - -struct rmaps_lama_level_info_t { - rmaps_lama_level_type_t type; - int max_resources; -}; -typedef struct rmaps_lama_level_info_t rmaps_lama_level_info_t; - -/* - * Structure to attach to the hwloc tree - * Accounting for mppr - */ -struct rmaps_lama_hwloc_user_t { - opal_object_t super; - - opal_pointer_array_t *node_mppr; -}; -typedef struct rmaps_lama_hwloc_user_t rmaps_lama_hwloc_user_t; -OBJ_CLASS_DECLARATION(rmaps_lama_hwloc_user_t); - -struct rmaps_lama_node_mppr_t { - int max; - int cur; -}; -typedef struct rmaps_lama_node_mppr_t rmaps_lama_node_mppr_t; - -rmaps_lama_level_type_t lama_type_str_to_enum(char *param); -char * lama_type_enum_to_str(rmaps_lama_level_type_t param); - - -/********************************* - * Command Line Interface Parsing - *********************************/ -/* - * User defined command line interface (CLI) arguments - */ -extern char * rmaps_lama_cmd_map; -extern char * rmaps_lama_cmd_bind; -extern char * rmaps_lama_cmd_mppr; -extern char * rmaps_lama_cmd_ordering; -extern bool rmaps_lama_timing_enabled; -extern bool rmaps_lama_can_oversubscribe; -extern bool rmaps_lama_am_oversubscribing; - -/* - * Internal representations of command line arguments - */ -extern int lama_mapping_num_layouts; -extern rmaps_lama_level_type_t *lama_mapping_layout; - -extern rmaps_lama_level_type_t lama_binding_level; - -extern rmaps_lama_level_info_t *lama_mppr_levels; -extern int lama_mppr_num_levels; - -/* - * Homogeneous system optimization - */ -extern bool lama_mppr_max_tree_homogeneous_system; - -/* - * Maximum length of digits in CLI - */ -#define MAX_BIND_DIGIT_LEN 4 - -int rmaps_lama_process_alias_params(orte_job_t *jdata); - -int rmaps_lama_parse_mapping(char *layout, - rmaps_lama_level_type_t **layout_types, - rmaps_lama_level_type_t **layout_types_sorted, - int *num_types); -int rmaps_lama_parse_binding(char *layout, - rmaps_lama_level_type_t *binding_level, - int *num_types); -int rmaps_lama_parse_mppr(char *layout, - rmaps_lama_level_info_t **mppr_levels, - int *num_types); -int rmaps_lama_parse_ordering(char *layout, - rmaps_lama_order_type_t *order); - -bool rmaps_lama_ok_to_prune_level(rmaps_lama_level_type_t level); - -/********************************* - * Max Tree Structure - *********************************/ -struct rmaps_lama_max_tree_item_t { - opal_tree_item_t tree_element; - - rmaps_lama_level_type_t type; -}; -typedef struct rmaps_lama_max_tree_item_t rmaps_lama_max_tree_item_t; - - -/* - * Union all topologies into the max tree - */ -int rmaps_lama_build_max_tree(orte_job_t *jdata, opal_list_t *node_list, - opal_tree_t * max_tree, bool *is_homogeneous); - -/* - * Find a matching subtree - */ -hwloc_obj_t * rmaps_lama_find_nth_subtree_match(hwloc_topology_t hwloc_topo, - hwloc_obj_t parent_obj, - int nth, - rmaps_lama_level_type_t lama_key); -hwloc_obj_t * rmaps_lama_find_parent(hwloc_topology_t hwloc_topo, - hwloc_obj_t *child_obj, - rmaps_lama_level_type_t lama_key); - -/* - * Create Empty Tree - */ -opal_tree_t * rmaps_lama_create_empty_max_tree(void); - -/* - * Pretty Print - */ -void rmaps_lama_max_tree_pretty_print_tree(opal_tree_t *tree); - -END_C_DECLS - -#endif /* ORTE_RMAPS_LAMA_H */ diff --git a/orte/mca/rmaps/lama/rmaps_lama_component.c b/orte/mca/rmaps/lama/rmaps_lama_component.c deleted file mode 100644 index a2ae980c72..0000000000 --- a/orte/mca/rmaps/lama/rmaps_lama_component.c +++ /dev/null @@ -1,136 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * - * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/mca/base/base.h" - -#include "orte/mca/rmaps/base/rmaps_private.h" -#include "orte/mca/rmaps/base/base.h" - -#include "rmaps_lama.h" - -/* - * Local functions - */ - -static int orte_rmaps_lama_register(void); -static int orte_rmaps_lama_query(mca_base_module_t **module, int *priority); - -static int module_priority; - -char * rmaps_lama_cmd_map = NULL; -char * rmaps_lama_cmd_bind = NULL; -char * rmaps_lama_cmd_mppr = NULL; -char * rmaps_lama_cmd_ordering = NULL; -bool rmaps_lama_timing_enabled = false; -bool rmaps_lama_can_oversubscribe = false; -bool rmaps_lama_am_oversubscribing = false; - -orte_rmaps_base_component_t mca_rmaps_lama_component = { - .base_version = { - ORTE_RMAPS_BASE_VERSION_2_0_0, - - .mca_component_name = "lama", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - .mca_query_component = orte_rmaps_lama_query, - .mca_register_component_params = orte_rmaps_lama_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - - -static int orte_rmaps_lama_register(void) -{ - mca_base_component_t *c = &mca_rmaps_lama_component.base_version; - - /* JMS Artifically low for now */ - module_priority = 0; - (void) mca_base_component_var_register (c, "priority", "Priority of the LAMA rmaps component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &module_priority); - - rmaps_lama_timing_enabled = false; - (void) mca_base_component_var_register (c, "timing", - "Enable timing information. [Default = disabled]", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &rmaps_lama_timing_enabled); - - rmaps_lama_cmd_map = NULL; - (void) mca_base_component_var_register (c, "map", "LAMA Map: Process layout iteration ordering (See documentation)", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, - &rmaps_lama_cmd_map); - - rmaps_lama_cmd_bind = NULL; - (void) mca_base_component_var_register (c, "bind", "LAMA Bind: Bind to the specified number of resources (See documentation)", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, - &rmaps_lama_cmd_bind); - - rmaps_lama_cmd_mppr = NULL; - (void) mca_base_component_var_register (c, "mppr", "LAMA MPPR: Maximum number of the specified resources available (See documentation)", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, - &rmaps_lama_cmd_mppr); - - rmaps_lama_cmd_ordering = NULL; - (void) mca_base_component_var_register (c, "ordering", "LAMA Ordering: Ordering (s) sequential, (n) natural - Default: n (See documentation)", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, - &rmaps_lama_cmd_ordering); - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Priority %3d", - module_priority); - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Map : %s", - (NULL == rmaps_lama_cmd_map) ? "NULL" : rmaps_lama_cmd_map); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Bind : %s", - (NULL == rmaps_lama_cmd_bind) ? "NULL" : rmaps_lama_cmd_bind); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: MPPR : %s", - (NULL == rmaps_lama_cmd_mppr) ? "NULL" : rmaps_lama_cmd_mppr); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Order : %s", - (NULL == rmaps_lama_cmd_ordering) ? "NULL" : rmaps_lama_cmd_ordering); - - return ORTE_SUCCESS; -} - - -static int orte_rmaps_lama_query(mca_base_module_t **module, int *priority) -{ - /* Only run on the HNP */ - - *priority = module_priority; - *module = (mca_base_module_t *)&orte_rmaps_lama_module; - - return ORTE_SUCCESS; -} diff --git a/orte/mca/rmaps/lama/rmaps_lama_max_tree.c b/orte/mca/rmaps/lama/rmaps_lama_max_tree.c deleted file mode 100644 index d14a8207a7..0000000000 --- a/orte/mca/rmaps/lama/rmaps_lama_max_tree.c +++ /dev/null @@ -1,1182 +0,0 @@ -/* - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * Max Tree Support Functions - * - */ -#include "rmaps_lama.h" - -#include "orte/util/show_help.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/rmaps/base/rmaps_private.h" -#include "orte/mca/rmaps/base/base.h" - -/********************************* - * Max Tree Construction - *********************************/ -/* - * Convert an hwloc tree to an opal_tree - */ -static int rmaps_lama_convert_hwloc_tree_to_opal_tree(opal_tree_t *opal_tree, - hwloc_topology_t *hwloc_topo); - -/* - * Convert an hwloc subtree to an opal subtree - */ -static int rmaps_lama_convert_hwloc_subtree(hwloc_obj_t obj, - opal_tree_item_t *parent_item); - -/* - * Convert LAMA key to HWLOC key/depth - */ -static int rmaps_lama_convert_lama_key_to_hwloc_key(rmaps_lama_level_type_t lama_key, - hwloc_obj_type_t *hwloc_key, int *depth); - -/* - * Convert HWLOC key/depth to LAMA key - */ -static int rmaps_lama_convert_hwloc_key_to_lama_key(hwloc_obj_type_t hwloc_key, int depth, - rmaps_lama_level_type_t *lama_key); - -/* - * Compare two HWLOC topologies for similar structure - */ -static int rmaps_lama_hwloc_compare_topos(hwloc_topology_t *left, hwloc_topology_t *right); -static int rmaps_lama_hwloc_compare_subtrees(hwloc_obj_t left, hwloc_obj_t right); - -/* - * Merge two opal_trees - */ -static int rmaps_lama_merge_trees(opal_tree_t *src_tree, opal_tree_t *into_tree, - opal_tree_item_t *src_parent, opal_tree_item_t *into_parent); - -/* - * Prune the max tree to just those levels specified - */ -static int rmaps_lama_prune_max_tree(opal_tree_t *max_tree, opal_tree_item_t *parent_item); - -/* - * Annotate the hwloc tree for MPPR accounting - */ -static int rmaps_lama_annotate_node_for_mppr(orte_node_t *node, hwloc_obj_t obj); - -/* - * Access the MPPR for the specified key - */ -static int rmaps_lama_get_mppr_for_key(orte_node_t *node, rmaps_lama_level_type_t lama_key); - -/* - * Recursive core of nth_subtree_match - */ -static int rmaps_lama_find_nth_subtree_match_core(hwloc_topology_t hwloc_topo, - hwloc_obj_t parent_obj, - int nth, - int *num_found, - hwloc_obj_type_t hwloc_key, - int depth, - hwloc_obj_t *cur_child); - -static void rmaps_lama_max_tree_item_construct(rmaps_lama_max_tree_item_t *item) -{ - item->type = LAMA_LEVEL_UNKNOWN; -} - - -/********************************* - * Max Tree Accessors/Functions - *********************************/ -OBJ_CLASS_INSTANCE(rmaps_lama_max_tree_item_t, - opal_tree_item_t, - rmaps_lama_max_tree_item_construct, NULL); - -static int lama_max_tree_comp(opal_tree_item_t *item, void *key); -static int lama_max_tree_serialize(opal_tree_item_t *item, opal_buffer_t *buffer); -static int lama_max_tree_deserialize(opal_buffer_t *buffer, opal_tree_item_t **item); -static void * lama_max_tree_get_key(opal_tree_item_t *item); - - -/********************************* - * Max Tree Pretty Print - *********************************/ -static char * rmaps_lama_max_tree_pretty_print_subtree_element_get(opal_tree_t *tree, - opal_tree_item_t *parent, - int level); -static void pretty_print_subtree(opal_tree_t *tree, opal_tree_item_t *parent, int level); -static void pretty_print_subtree_element(opal_tree_t *tree, opal_tree_item_t *parent, int level); - - -/********************************* - * Function Defintions - *********************************/ -int rmaps_lama_build_max_tree(orte_job_t *jdata, opal_list_t *node_list, - opal_tree_t * max_tree, bool *is_homogeneous) -{ - int ret; - opal_tree_t *tmp_tree = NULL; - hwloc_topology_t topo, *last_topo = NULL; - orte_node_t *cur_node = NULL; - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Building the Max Tree..."); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - - /* - * Assume homogeneous system, unless otherwise noted - */ - *is_homogeneous = true; - - /* - * Process all other unique trees from remote daemons who are in - * this allocation - */ - for(cur_node = (orte_node_t*)opal_list_get_first(node_list); - cur_node != (orte_node_t*)opal_list_get_end(node_list); - cur_node = (orte_node_t*)opal_list_get_next(cur_node) ) { - if (NULL == (topo = cur_node->topology)) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- No Tree Available: %s (skipping)", cur_node->name); - } - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Converting Remote Tree: %s", cur_node->name); - - /* - * Convert to opal_tree - */ - tmp_tree = rmaps_lama_create_empty_max_tree(); - rmaps_lama_convert_hwloc_tree_to_opal_tree(tmp_tree, &topo); - if( 11 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - rmaps_lama_max_tree_pretty_print_tree(tmp_tree); - } - - /* - * Compare the current and last topologies if we are still considering - * this max tree to represent a homogeneous system. - */ - if( *is_homogeneous ) { - if( NULL == last_topo ) { - last_topo = &topo; - } else { - if( 0 != rmaps_lama_hwloc_compare_topos(last_topo, &topo) ) { - *is_homogeneous = false; - } - } - } - - /* - * Prune the input tree so that is only contains levels that the user - * asked for. - */ - if( 11 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Pruning input Tree..."); - } - if( ORTE_SUCCESS != (ret = rmaps_lama_prune_max_tree(tmp_tree, opal_tree_get_root(tmp_tree))) ) { - return ret; - } - if( 11 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Input Tree... - Post Prune"); - rmaps_lama_max_tree_pretty_print_tree(tmp_tree); - } - - /* - * Merge into max_tree - */ - if( opal_tree_is_empty(max_tree) ) { - opal_tree_dup(tmp_tree, max_tree); - } else { - if( ORTE_SUCCESS != (ret = rmaps_lama_merge_trees(tmp_tree, - max_tree, - opal_tree_get_root(tmp_tree), - opal_tree_get_root(max_tree) ))) { - return ret; - } - } - - /* - * Release and move on... - */ - OBJ_RELEASE(tmp_tree); - tmp_tree = NULL; - } - - - /* - * Fill out the MPPR accounting information for each node - */ - for(cur_node = (orte_node_t*)opal_list_get_first(node_list); - cur_node != (orte_node_t*)opal_list_get_end(node_list); - cur_node = (orte_node_t*)opal_list_get_next(cur_node) ) { - if( ORTE_SUCCESS != (ret = rmaps_lama_annotate_node_for_mppr(cur_node, - hwloc_get_obj_by_depth(cur_node->topology, 0, 0))) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - /* - * JJH: NEEDS TESTING - * Note: This check is in place, but not used at the moment due to lack of - * system availability. Pending system availability and further testing, - * just assume heterogeneous. - */ - *is_homogeneous = false; - - /* - * Display the final Max Tree - */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Final Max Tree... - %s system", - (*is_homogeneous ? "Homogeneous" : "Heterogeneous") ); - if( 11 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - rmaps_lama_max_tree_pretty_print_tree(max_tree); - } - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - - return ORTE_SUCCESS; -} - -static int rmaps_lama_convert_hwloc_tree_to_opal_tree(opal_tree_t *opal_tree, hwloc_topology_t *hwloc_topo) -{ - hwloc_obj_t topo_root; - - if( 15 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - opal_output_verbose(15, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Converting Topology:"); - /* opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO); */ - opal_dss.dump(0, *hwloc_topo, OPAL_HWLOC_TOPO); - } - - topo_root = hwloc_get_root_obj(*hwloc_topo); - - rmaps_lama_convert_hwloc_subtree(topo_root, - opal_tree_get_root(opal_tree)); - - return ORTE_SUCCESS; -} - -static int rmaps_lama_convert_hwloc_subtree(hwloc_obj_t obj, - opal_tree_item_t *parent_item) -{ - rmaps_lama_max_tree_item_t *max_tree_item = NULL; - char * key_child_str = NULL; - char * key_parent_str = NULL; - - while (obj) { - /* - * Create new tree item - */ - max_tree_item = OBJ_NEW(rmaps_lama_max_tree_item_t); - - /* - * Convert the HWLOC object to the LAMA key - */ - rmaps_lama_convert_hwloc_key_to_lama_key(obj->type, - obj->attr->cache.depth, - &(max_tree_item->type)); - - /* - * Append tree item to parent. Unless it is the same as the - * parent (L1 instruction vs data cache). JJH: Newer versions - * of hwloc can differentiate from the obj->attr->cache.type. - */ - if( NULL != obj->parent && - obj->parent->type == obj->type && - obj->parent->attr->cache.depth == obj->attr->cache.depth ) { - key_child_str = lama_type_enum_to_str(max_tree_item->type); - key_parent_str = lama_type_enum_to_str(((rmaps_lama_max_tree_item_t*)parent_item)->type); - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Warning: Identical level detected: " - "Child [%s] vs Parent [%s]", - key_child_str, key_parent_str); - free(key_child_str); - free(key_parent_str); - - /* - * Add descendants if they exist - */ - if (obj->first_child) { - rmaps_lama_convert_hwloc_subtree(obj->first_child, - parent_item); - } - } else { - opal_tree_add_child(parent_item, &max_tree_item->tree_element); - - /* - * Add descendants if they exist - */ - if (obj->first_child) { - rmaps_lama_convert_hwloc_subtree(obj->first_child, - &max_tree_item->tree_element); - } - } - - /* - * Advance to next sibling - */ - obj = obj->next_sibling; - } - - return ORTE_SUCCESS; -} - -static int rmaps_lama_annotate_node_for_mppr(orte_node_t *node, hwloc_obj_t obj) -{ - rmaps_lama_hwloc_user_t *hwloc_userdata = NULL; - rmaps_lama_node_mppr_t *mppr_accounting = NULL; - rmaps_lama_level_type_t lama_key; - opal_hwloc_topo_data_t *opal_hwloc_topo = NULL; - int i; - - /* - * Attach our user pointer to the topology, if it is not already there. - * We will fill it in as needed later. - * - * Note: opal/mca/hwloc/base/hwloc_base_util.c attaches their own object - * to the userdata. There is a pointer in that structure we can use without - * interfering with what OPAL is trying to do. - */ - if( NULL == obj->userdata ) { - /* Some objects may not have topo data associated with them - * JJH: This is memory leak :/ Fix. - */ - obj->userdata = (void*)OBJ_NEW(opal_hwloc_topo_data_t); - } - if( NULL != obj->userdata ) { - opal_hwloc_topo = (opal_hwloc_topo_data_t*)(obj->userdata); - - if( NULL == opal_hwloc_topo->userdata ) { - hwloc_userdata = OBJ_NEW(rmaps_lama_hwloc_user_t); - opal_hwloc_topo->userdata = hwloc_userdata; - } else { - hwloc_userdata = (rmaps_lama_hwloc_user_t*)(opal_hwloc_topo->userdata); - } - } - - - /* - * Add node information if it is not already there - */ - mppr_accounting = (rmaps_lama_node_mppr_t*)opal_pointer_array_get_item(hwloc_userdata->node_mppr, node->index); - if( NULL == mppr_accounting ) { - /* - * Add MPPR accounting for this node associated with this object - */ - rmaps_lama_convert_hwloc_key_to_lama_key(obj->type, obj->attr->cache.depth, &lama_key); - - mppr_accounting = (rmaps_lama_node_mppr_t*)malloc(sizeof(rmaps_lama_node_mppr_t)); - mppr_accounting->max = rmaps_lama_get_mppr_for_key(node, lama_key); - mppr_accounting->cur = 0; - - opal_pointer_array_set_item(hwloc_userdata->node_mppr, node->index, mppr_accounting); - } - - - /* - * Decend tree - */ - for(i = 0; i < (int)obj->arity; ++i ) { - rmaps_lama_annotate_node_for_mppr(node, - obj->children[i]); - } - - return ORTE_SUCCESS; -} - -static int rmaps_lama_get_mppr_for_key(orte_node_t *node, rmaps_lama_level_type_t lama_key) -{ - int i; - - for( i = 0; i < lama_mppr_num_levels; ++i ) { - if( lama_key == lama_mppr_levels[i].type ) { - return lama_mppr_levels[i].max_resources; - } - } - - return -1; -} - -static int rmaps_lama_convert_lama_key_to_hwloc_key(rmaps_lama_level_type_t lama_key, hwloc_obj_type_t *hwloc_key, int *depth) -{ - *depth = 0; - - switch(lama_key) { - case LAMA_LEVEL_MACHINE: - *hwloc_key = HWLOC_OBJ_MACHINE; - break; - /* Note: HWLOC does not support boards */ -#if 0 - case LAMA_LEVEL_BOARD: - *hwloc_key = HWLOC_OBJ_MACHINE; - break; -#endif - case LAMA_LEVEL_SOCKET: - *hwloc_key = HWLOC_OBJ_SOCKET; - break; - case LAMA_LEVEL_CORE: - *hwloc_key = HWLOC_OBJ_CORE; - break; - case LAMA_LEVEL_PU: - *hwloc_key = HWLOC_OBJ_PU; - break; - case LAMA_LEVEL_CACHE_L1: - *hwloc_key = HWLOC_OBJ_CACHE; - *depth = 1; - break; - case LAMA_LEVEL_CACHE_L2: - *hwloc_key = HWLOC_OBJ_CACHE; - *depth = 2; - break; - case LAMA_LEVEL_CACHE_L3: - *hwloc_key = HWLOC_OBJ_CACHE; - *depth = 3; - break; - case LAMA_LEVEL_NUMA: - *hwloc_key = HWLOC_OBJ_NODE; - break; - default: - *hwloc_key = HWLOC_OBJ_TYPE_MAX; - break; - } - - return ORTE_SUCCESS; -} - -static int rmaps_lama_convert_hwloc_key_to_lama_key(hwloc_obj_type_t hwloc_key, int depth, rmaps_lama_level_type_t *lama_key) -{ - switch(hwloc_key) { - case HWLOC_OBJ_MACHINE: - *lama_key = LAMA_LEVEL_MACHINE; - break; - /* Node: HWLOC does not support boards */ -#if 0 - case HWLOC_OBJ_BOARD: - *lama_key = LAMA_LEVEL_BOARD; - break; -#endif - case HWLOC_OBJ_SOCKET: - *lama_key = LAMA_LEVEL_SOCKET; - break; - case HWLOC_OBJ_CORE: - *lama_key = LAMA_LEVEL_CORE; - break; - case HWLOC_OBJ_PU: - *lama_key = LAMA_LEVEL_PU; - break; - case HWLOC_OBJ_CACHE: - if( 1 == depth ) { - *lama_key = LAMA_LEVEL_CACHE_L1; - } - else if( 2 == depth ) { - *lama_key = LAMA_LEVEL_CACHE_L2; - } - else if( 3 == depth ) { - *lama_key = LAMA_LEVEL_CACHE_L3; - } - else { - *lama_key = LAMA_LEVEL_UNKNOWN; - } - break; - case HWLOC_OBJ_NODE: - *lama_key = LAMA_LEVEL_NUMA; - break; - default: - *lama_key = LAMA_LEVEL_UNKNOWN; - break; - } - - return ORTE_SUCCESS; -} - -static int rmaps_lama_hwloc_compare_topos(hwloc_topology_t *left, hwloc_topology_t *right) -{ - hwloc_obj_t left_root; - hwloc_obj_t right_root; - - /* - * Note: I hope that there is a 'better' way of doing this natively with - * HWLOC, but it is not obvious if they have the ability to compare - * topologies. So do a depth first comparison of the trees. - * You may be able to use the below: - * OPAL_EQUAL != opal_dss.compare(*last_topo, topo, OPAL_HWLOC_TOPO); - */ - - left_root = hwloc_get_obj_by_depth(*left, 0, 0); - right_root = hwloc_get_obj_by_depth(*right, 0, 0); - - return rmaps_lama_hwloc_compare_subtrees(left_root, right_root); -} - -static int rmaps_lama_hwloc_compare_subtrees(hwloc_obj_t left, hwloc_obj_t right) -{ - int i, ret; - - /* - * Check Types - */ - if( 0 != (ret = hwloc_compare_types(left->type, right->type)) ) { - return ret; - } - - /* - * Check 'arity' at this level - */ - if( left->arity > right->arity ) { - return -1; - } - else if( left->arity < right->arity ) { - return 1; - } - - /* - * Check all subtrees - */ - for(i = 0; i < (int)left->arity; ++i ) { - if( 0 != (ret = rmaps_lama_hwloc_compare_subtrees(left->children[i], - right->children[i])) ) { - return ret; - } - } - - /* - * Subtree is the same if we get here - */ - return 0; -} - -static int rmaps_lama_merge_trees(opal_tree_t *src_tree, opal_tree_t *max_tree, - opal_tree_item_t *src_parent, opal_tree_item_t *max_parent) -{ - int ret, exit_status = ORTE_SUCCESS; - rmaps_lama_level_type_t *key_src, *key_max; - opal_tree_item_t *child_item = NULL, *max_grandparent = NULL; - opal_tree_item_t *max_child_item = NULL; - int num_max, num_src; - int i; - char *key_src_str = NULL; - char *key_max_str = NULL; -#if 1 - char *str = NULL; -#endif - - /* - * Basecase - */ - if( NULL == src_parent ) { - return ORTE_SUCCESS; - } - - key_src = (rmaps_lama_level_type_t*)src_tree->get_key(src_parent); - key_max = (rmaps_lama_level_type_t*)max_tree->get_key(max_parent); - - key_src_str = lama_type_enum_to_str(*key_src); - key_max_str = lama_type_enum_to_str(*key_max); - - if( 15 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: CHECK: Merge Trees: Keys Src (%2d - %s) vs Max (%2d - %s)", - *key_src, key_src_str, *key_max, key_max_str); - } - - /* - * Make sure keys at this level match. - * - * JJH: Give up if they do not match. - * JJH: We should pick a victim and prune from the tree - * JJH: preferably from the 'native' tree. - */ - if( 0 != max_tree->comp(max_parent, src_tree->get_key(src_parent)) ) { - /* - * If the source conflicts due to cache, iterate to children to find a match. - * JJH: Double check this for different heterogenous systems - */ - if( LAMA_LEVEL_CACHE_L3 == *key_src || - LAMA_LEVEL_CACHE_L2 == *key_src || - LAMA_LEVEL_CACHE_L1 == *key_src || - LAMA_LEVEL_NUMA == *key_src ) { - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Warning: Merge Trees: " - "Src with Conflicting Memory Hierarchy [Src (%2d - %s) vs Max (%2d - %s)]", - *key_src, key_src_str, *key_max, key_max_str); - - /* - * If we are pruning a cache level, then check to make sure it is - * not important to the process layout. - */ - if( !rmaps_lama_ok_to_prune_level(*key_src) ) { - orte_show_help("help-orte-rmaps-lama.txt", - "orte-rmaps-lama:merge-conflict-bad-prune-src", - true, - key_src_str, - (NULL == rmaps_lama_cmd_map ? "[Not Provided]" : rmaps_lama_cmd_map), - (NULL == rmaps_lama_cmd_bind ? "[Not Provided]" : rmaps_lama_cmd_bind), - (NULL == rmaps_lama_cmd_mppr ? "[Not Provided]" : rmaps_lama_cmd_mppr), - (NULL == rmaps_lama_cmd_ordering ? "[Not Provided]" : rmaps_lama_cmd_ordering)); - exit_status = ORTE_ERROR; - goto cleanup; - } - - /* - * If the number of children at this pruned level was larger than - * the max tree arity at this level, then duplicate the max_tree - * element the approprate number of times - */ - max_grandparent = opal_tree_get_parent(max_parent); - num_max = opal_tree_num_children(max_grandparent); - num_src = opal_tree_num_children(src_parent); - - for(i = 0; i < (num_src - num_max); ++i ) { -#if 1 - str = rmaps_lama_max_tree_pretty_print_subtree_element_get(max_tree, max_parent, 0); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Merge: Appending child %s - post prune", - str); - free(str); -#endif - /* Duplicate max child subtree */ - opal_tree_copy_subtree(max_tree, max_parent, max_tree, max_grandparent); - } - - /* - * Iterate to children, until we find a match - */ - for(child_item = opal_tree_get_first_child(src_parent); - child_item != NULL; - child_item = opal_tree_get_next_sibling(child_item) ) { - - if( ORTE_SUCCESS != (ret = rmaps_lama_merge_trees(src_tree, - max_tree, - child_item, - max_parent)) ) { - exit_status = ret; - goto cleanup; - } - } - - exit_status = ORTE_SUCCESS; - goto cleanup; - } - /* - * If the max tree conflicts due to cache, then we need to prune the - * max tree until it matches. - * JJH: If we are pruning a level of the hierarchy then make sure we - * JJH: don't need it for the process layout. - */ - else if( LAMA_LEVEL_CACHE_L3 == *key_max || - LAMA_LEVEL_CACHE_L2 == *key_max || - LAMA_LEVEL_CACHE_L1 == *key_max || - LAMA_LEVEL_NUMA == *key_max ) { - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Warning: Merge Trees: " - "Max with Conflicting Memory Hierarchy [Src (%2d - %s) vs Max (%2d - %s)]", - *key_src, key_src_str, *key_max, key_max_str); - - /* - * If we are pruning a cache level, then check to make sure it is - * not important to the process layout. - */ - if( !rmaps_lama_ok_to_prune_level(*key_max) ) { - orte_show_help("help-orte-rmaps-lama.txt", - "orte-rmaps-lama:merge-conflict-bad-prune-src", - true, - key_max_str, - (NULL == rmaps_lama_cmd_map ? "[Not Provided]" : rmaps_lama_cmd_map), - (NULL == rmaps_lama_cmd_bind ? "[Not Provided]" : rmaps_lama_cmd_bind), - (NULL == rmaps_lama_cmd_mppr ? "[Not Provided]" : rmaps_lama_cmd_mppr), - (NULL == rmaps_lama_cmd_ordering ? "[Not Provided]" : rmaps_lama_cmd_ordering)); - exit_status = ORTE_ERROR; - goto cleanup; - } - - max_child_item = opal_tree_get_first_child(max_parent); - /* Prune parent */ - opal_tree_remove_item(max_tree, max_parent); - - /* Try again with child */ - exit_status = rmaps_lama_merge_trees(src_tree, - max_tree, - src_parent, - max_child_item); - goto cleanup; - } - - /* - * If we cannot resolve it, give up. - */ - opal_output(0, "mca:rmaps:lama: Error: Merge Trees: " - "Different Keys Src (%2d - %s) vs Max (%2d - %s) - Do not know how to resolve - give up!", - *key_src, key_src_str, *key_max, key_max_str); - - exit_status = ORTE_ERROR; - goto cleanup; - } - - num_max = opal_tree_num_children(max_parent); - num_src = opal_tree_num_children(src_parent); - - /* - * If the 'native' tree has more children than the 'max' tree. - * Add the missing children to the 'max' tree. - */ - if( num_max < num_src ) { - i = 0; - for(child_item = opal_tree_get_first_child(src_parent); - child_item != NULL; - child_item = opal_tree_get_next_sibling(child_item)) { - if(i >= num_max ) { -#if 1 - str = rmaps_lama_max_tree_pretty_print_subtree_element_get(src_tree, child_item, 0); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Merge: Appending child %s", - str); - free(str); -#endif - /* Add child's subtree to max */ - opal_tree_copy_subtree(src_tree, child_item, max_tree, max_parent); - } - ++i; - } - } - - /* - * Recursively search all children of 'native' tree. - * - * Note: Only need to add the children to the 'left-most' branch of the - * 'max' tree since that is the only branch that is searched during mapping. - * But do the whole thing for good measure. - */ - for( child_item = opal_tree_get_first_child(src_parent), - max_child_item = opal_tree_get_first_child(max_parent); - child_item != NULL; - child_item = opal_tree_get_next_sibling(child_item), - max_child_item = opal_tree_get_next_sibling(max_child_item) ) { - - if( ORTE_SUCCESS != (ret = rmaps_lama_merge_trees(src_tree, - max_tree, - child_item, - max_child_item)) ) { - exit_status = ret; - goto cleanup; - } - } - - cleanup: - if( NULL != key_src_str ) { - free(key_src_str); - key_src_str = NULL; - } - - if( NULL != key_max_str ) { - free(key_max_str); - key_max_str = NULL; - } - - return exit_status; -} - -static int rmaps_lama_prune_max_tree(opal_tree_t *max_tree, opal_tree_item_t *parent_item) -{ - int ret; - opal_tree_item_t *child_item = NULL, *next_item; - int i; - bool found; - rmaps_lama_level_type_t *key_max; - char *tmp_str = NULL; - - /* - * Basecase - */ - if( NULL == parent_item ) { - return ORTE_SUCCESS; - } - - /* - * Recursively decend tree - Depth first - * Basecase: No children, loop skipped - */ - child_item = opal_tree_get_first_child(parent_item); - while( child_item != NULL ) { - /* Do this before the recursive call, since it might remove this - * child so we need to preserve a pointer to the next sibling. - */ - next_item = opal_tree_get_next_sibling(child_item); - - if( ORTE_SUCCESS != (ret = rmaps_lama_prune_max_tree(max_tree, - child_item)) ) { - return ret; - } - - child_item = next_item; - } - - key_max = (rmaps_lama_level_type_t*)max_tree->get_key(parent_item); - - /* - * Check keys against the user supplied layout - */ - found = false; - for(i = 0; i < lama_mapping_num_layouts; ++i ) { - if( 0 == max_tree->comp(parent_item, &lama_mapping_layout[i]) ) { - found = true; - break; - } - } - - if( !found ) { - if( 15 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - tmp_str = lama_type_enum_to_str(*key_max); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Before pruning %s", - tmp_str); - free(tmp_str); - rmaps_lama_max_tree_pretty_print_tree(max_tree); - } - - opal_tree_remove_item(max_tree, parent_item); - - return ORTE_SUCCESS; - } - - return ORTE_SUCCESS; -} - - -hwloc_obj_t * rmaps_lama_find_nth_subtree_match(hwloc_topology_t hwloc_topo, - hwloc_obj_t parent_obj, - int nth, - rmaps_lama_level_type_t lama_key) -{ - hwloc_obj_t *cur_child = NULL; - hwloc_obj_type_t hwloc_key; - int depth; - int num_found; -#if 0 - char str[128]; -#endif - - cur_child = (hwloc_obj_t*)malloc(sizeof(hwloc_obj_t) * 1); - - /* - * Convert LAMA key to HWLOC key - */ - rmaps_lama_convert_lama_key_to_hwloc_key(lama_key, &hwloc_key, &depth); - - /* - * Decend tree looking for the n'th matching subtree - */ - num_found = -1; - rmaps_lama_find_nth_subtree_match_core(hwloc_topo, - parent_obj, - nth, - &num_found, - hwloc_key, - depth, - cur_child); - - /* - * Check to see if we found it - */ -#if 0 - hwloc_obj_snprintf(str, sizeof(str), hwloc_topo, *cur_child, "#", 0); - if( nth == num_found ) { - printf("--> FOUND : %-20s \t -- \t %2d of %2d\n", str, nth, num_found); - } - else { - printf("--> MISSING : %-20s \t -- \t %2d of %2d\n", str, nth, num_found); - } -#endif - - if( nth == num_found ) { - return cur_child; - } - else { - free(cur_child); - return NULL; - } -} - -static int rmaps_lama_find_nth_subtree_match_core(hwloc_topology_t hwloc_topo, - hwloc_obj_t parent_obj, - int nth, - int *num_found, - hwloc_obj_type_t hwloc_key, - int depth, - hwloc_obj_t *cur_child) -{ - unsigned i; - bool found = false; - -#if 0 - { - char str[128]; - hwloc_obj_snprintf(str, sizeof(str), hwloc_topo, parent_obj, "#", 0); - printf("--> Checking -- %-20s \t -- \t %2d of %2d\n", str, nth, *num_found); - } -#endif - - /* - * Check if the keys match - */ - if( hwloc_key == parent_obj->type ) { - if( HWLOC_OBJ_CACHE == parent_obj->type && - depth == (int)parent_obj->attr->cache.depth ) { - *num_found += 1; - found = true; - } else { - *num_found += 1; - found = true; - } - } - - /* - * Basecase: - * If we have found the correct item, return - */ - if( nth == *num_found ) { - *cur_child = parent_obj; - return ORTE_SUCCESS; - } - - /* - * Do no go any deeper in the tree than we have to - */ - if( !found ) { - for(i = 0; i < parent_obj->arity; ++i ) { - rmaps_lama_find_nth_subtree_match_core(hwloc_topo, - parent_obj->children[i], - nth, - num_found, - hwloc_key, - depth, - cur_child); - if( nth == *num_found ) { - return ORTE_SUCCESS; - } - } - } - - return ORTE_SUCCESS; -} - -hwloc_obj_t * rmaps_lama_find_parent(hwloc_topology_t hwloc_topo, - hwloc_obj_t *child_obj, - rmaps_lama_level_type_t lama_key) -{ - hwloc_obj_t *cur_parent = NULL; - hwloc_obj_type_t hwloc_key; - int depth; - - /* - * Convert LAMA key to HWLOC key - */ - rmaps_lama_convert_lama_key_to_hwloc_key(lama_key, &hwloc_key, &depth); - - /* - * Sanity check - */ - if( hwloc_key == (*child_obj)->type ) { - if( HWLOC_OBJ_CACHE == (*child_obj)->type && - depth == (int)(*child_obj)->attr->cache.depth ) { - return child_obj; - } else { - return child_obj; - } - } - - cur_parent = (hwloc_obj_t*)malloc(sizeof(hwloc_obj_t) * 1); - if (NULL == cur_parent) { - return NULL; - } - - /* - * Accend tree to find mathing parent - */ - *cur_parent = (*child_obj)->parent; - while(NULL != *cur_parent ) { - if( hwloc_key == (*cur_parent)->type ) { - if( HWLOC_OBJ_CACHE == (*cur_parent)->type && - depth == (int)(*cur_parent)->attr->cache.depth ) { - return cur_parent; - } else { - return cur_parent; - } - } - - *cur_parent = (*cur_parent)->parent; - } - - free(cur_parent); - return NULL; -} - - -/********************************* - * Max Tree Structure Functions - *********************************/ -opal_tree_t * rmaps_lama_create_empty_max_tree(void) -{ - opal_tree_t *tmp_tree = NULL; - - tmp_tree = OBJ_NEW(opal_tree_t); - opal_tree_init(tmp_tree, - &lama_max_tree_comp, - &lama_max_tree_serialize, - &lama_max_tree_deserialize, - &lama_max_tree_get_key); - - return tmp_tree; -} - -static int lama_max_tree_comp(opal_tree_item_t *item, void *key) -{ - if( ((rmaps_lama_max_tree_item_t *)item)->type == *((rmaps_lama_level_type_t *)key) ) { - return 0; - } - - return -1; -} - -static int lama_max_tree_serialize(opal_tree_item_t *item, opal_buffer_t *buffer) -{ - opal_dss.pack(buffer, &(((rmaps_lama_max_tree_item_t *)item)->type), 1, OPAL_INT); - - return ORTE_SUCCESS; -} - -static int lama_max_tree_deserialize(opal_buffer_t *buffer, opal_tree_item_t **item) -{ - rmaps_lama_max_tree_item_t *element; - orte_std_cntr_t n = 1; - - element = OBJ_NEW(rmaps_lama_max_tree_item_t); - if( OPAL_SUCCESS == opal_dss.unpack(buffer, &(element->type), &n, OPAL_INT) ) { - *item = (opal_tree_item_t*)element; - } else { - *item = NULL; - } - - return ORTE_SUCCESS; -} - -static void * lama_max_tree_get_key(opal_tree_item_t *item) -{ - return &(((rmaps_lama_max_tree_item_t *)item)->type); -} - - -/********************************* - * Pretty Print Functions - *********************************/ -void rmaps_lama_max_tree_pretty_print_tree(opal_tree_t *tree) -{ - if( NULL == tree ) { - return; - } - - if( opal_tree_is_empty(tree) ) { - return; - } - - pretty_print_subtree(tree, opal_tree_get_root(tree), 0); - - return; -} - -static char * rmaps_lama_max_tree_pretty_print_subtree_element_get(opal_tree_t *tree, - opal_tree_item_t *parent, - int level) -{ - char *element_str = NULL; - char *spacer = NULL; - char *label = NULL; - rmaps_lama_level_type_t *type = NULL; - int i; - - if( NULL == parent ) { - return NULL; - } - - spacer = (char *)malloc(sizeof(char) * (level+1)); - for(i = 0; i < level; ++i ) { - spacer[i] = ' '; - } - spacer[level] = '\0'; - - type = (rmaps_lama_level_type_t *)(tree->get_key(parent)); - label = lama_type_enum_to_str(*type); - - asprintf(&element_str, "%s[%s \t : %3d, %3d", - spacer, label, - parent->opal_tree_num_children, parent->opal_tree_num_ancestors); - - free(spacer); - free(label); - - return element_str; -} - -static void pretty_print_subtree(opal_tree_t *tree, opal_tree_item_t *parent, int level) -{ - opal_tree_item_t *child = NULL; - - if( NULL == parent ) { - return; - } - - /* - * Display Self - */ - pretty_print_subtree_element(tree, parent, level); - - /* - * Depth-first display children - * Basecase; If no children - return - */ - level++; - for(child = opal_tree_get_first_child(parent); - child != NULL; - child = opal_tree_get_next_sibling(child) ) { - pretty_print_subtree(tree, child, level); - } - - return; - -} - -static void pretty_print_subtree_element(opal_tree_t *tree, opal_tree_item_t *parent, int level) -{ - char *element_str = NULL; - - if( NULL == parent ) { - return; - } - - element_str = rmaps_lama_max_tree_pretty_print_subtree_element_get(tree, parent, level); - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Tree Element: %s", - element_str); - - free(element_str); - - return; -} diff --git a/orte/mca/rmaps/lama/rmaps_lama_module.c b/orte/mca/rmaps/lama/rmaps_lama_module.c deleted file mode 100644 index c200d414d8..0000000000 --- a/orte/mca/rmaps/lama/rmaps_lama_module.c +++ /dev/null @@ -1,1916 +0,0 @@ -/* - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * - * Copyright (c) 2012-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" -#include "orte/types.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H -#include -#endif /* HAVE_STRING_H */ - -#include "opal/mca/hwloc/hwloc.h" - -#include "opal/util/argv.h" -#include "opal/class/opal_tree.h" - -#include "orte/util/show_help.h" -#include "orte/util/error_strings.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rmaps/base/rmaps_private.h" -#include "orte/mca/rmaps/base/base.h" - -#include "orte/runtime/orte_globals.h" - -#include "rmaps_lama.h" - -#include MCA_timer_IMPLEMENTATION_HEADER - - -/********************************* - * Module setup - *********************************/ -static int orte_rmaps_lama_map(orte_job_t *jdata); -orte_rmaps_base_module_t orte_rmaps_lama_module = { - orte_rmaps_lama_map -}; - - -/********************************* - * Timer - *********************************/ -#define RMAPS_LAMA_TIMER_TOTAL 0 -#define RMAPS_LAMA_TIMER_PARSE_PARAMS 1 -#define RMAPS_LAMA_TIMER_BUILD_MAX_TREE 2 -#define RMAPS_LAMA_TIMER_MAPPING 3 -#define RMAPS_LAMA_TIMER_ORDERING 4 -#define RMAPS_LAMA_TIMER_MAX 5 - -static double rmaps_lama_get_time(void); -static void rmaps_lama_set_time(int idx, bool is_start); -static void rmaps_lama_display_all_timers(void); -static void rmaps_lama_clear_timers(void); -static void rmaps_lama_display_indv_timer_core(double diff, char *str); - -static double timer_start[RMAPS_LAMA_TIMER_MAX]; -static double timer_end[RMAPS_LAMA_TIMER_MAX]; -static double timer_accum[RMAPS_LAMA_TIMER_MAX]; - -#define RMAPS_LAMA_CLEAR_TIMERS() \ - { \ - if( rmaps_lama_timing_enabled ) { \ - rmaps_lama_clear_timers(); \ - } \ - } -#define RMAPS_LAMA_START_TIMER(idx) \ - { \ - if( rmaps_lama_timing_enabled ) { \ - rmaps_lama_set_time(idx, true); \ - } \ - } -#define RMAPS_LAMA_END_TIMER(idx) \ - { \ - if( rmaps_lama_timing_enabled ) { \ - rmaps_lama_set_time(idx, false); \ - } \ - } -#define RMAPS_LAMA_DISPLAY_TIMERS() \ - { \ - if( rmaps_lama_timing_enabled ) { \ - rmaps_lama_display_all_timers(); \ - } \ - } - - -/********************************* - * Structures & Defines - *********************************/ -static void rmaps_lama_hwloc_user_construct(rmaps_lama_hwloc_user_t *item); -static void rmaps_lama_hwloc_user_destruct(rmaps_lama_hwloc_user_t *item); - -OBJ_CLASS_INSTANCE(rmaps_lama_hwloc_user_t, - opal_object_t, - rmaps_lama_hwloc_user_construct, - rmaps_lama_hwloc_user_destruct); - - -/********************************* - * Globals - *********************************/ -/* - * Mapping - */ -rmaps_lama_level_type_t *lama_mapping_layout = NULL; -static rmaps_lama_level_type_t *lama_mapping_layout_sort = NULL; -int lama_mapping_num_layouts = 0; - -/* - * Binding - */ -rmaps_lama_level_type_t lama_binding_level = LAMA_LEVEL_UNKNOWN; -static int lama_binding_num_levels = 0; - -/* - * MPPR - */ -rmaps_lama_level_info_t *lama_mppr_levels = NULL; -int lama_mppr_num_levels = 0; - -/* - * Ordering - */ -static rmaps_lama_order_type_t lama_ordering = LAMA_ORDER_NATURAL; - -/* - * Homogeneous system optimization - */ -bool lama_mppr_max_tree_homogeneous_system = false; - - -/********************************* - * Support Macros - *********************************/ - - -/********************************* - * Support functions - *********************************/ -/* - * Preprocess the command line arguments - */ -static int orte_rmaps_lama_process_params(orte_job_t *jdata); - -/* - * Mapping Support: - * Core mapping function - */ -static int orte_rmaps_lama_map_core(orte_job_t *jdata); - -/* - * Mapping Support: - * Recursive function for mapping process - */ -static int rmaps_lama_map_core_iter_level(orte_job_t *jdata, - orte_app_context_t *cur_app_context, - opal_list_t *node_list, - orte_node_t **cur_mach_ptr, - opal_tree_t *max_tree, - int cur_level, - int mach_level, - int **pu_idx_ref, - int **last_pu_idx_ref, - int *num_mapped, - int max_procs, - int *iter_passes); - -/* - * Mapping Support: - * Access the next machine in the node list - */ -static orte_node_t* get_next_machine(orte_job_t *jdata, opal_list_t *node_list, - opal_list_item_t *cur_mach); - -/* - * Mapping Support: - * Check the availability of the requested slot on the specified node - */ -static int check_node_availability(orte_node_t *cur_node, - opal_tree_t *max_tree, - int *pu_idx_ref, - char **slot_list); - -/* - * Mapping Support: - * Debugging PU display - */ -static void display_pu_ref(int *ref, int size, int rank, orte_proc_t *proc); -static char * pu_ref_to_str(int *ref, int size); - -/* - * Mapping Support: - * Convert the process layout 'layer' to the sorted position for the PU - */ -static int convert_layer_to_sort_idx(rmaps_lama_level_type_t layer); - -/* - * MPPR Support: - * Check to make sure a process can be placed on this resource given the - * MPPR restrictions. - */ -static int rmaps_lama_check_mppr(orte_node_t *node, - hwloc_obj_t *child_obj); -static int rmaps_lama_iter_mppr_parents(orte_node_t *node, - hwloc_obj_t *child_obj, - bool check_only); -static int rmaps_lama_iter_mppr_children(orte_node_t *node, - hwloc_obj_t *child_obj, - bool check_only); - -/* - * MPPR Support: - * Increment parents of this child to account for a process being placed - * on this resource. - */ -static int rmaps_lama_inc_mppr(orte_node_t *node, - hwloc_obj_t *child_obj); - -/* - * Mapping Support: - * Return the native representation of the slot list - */ -static char * get_native_slot_list(orte_node_t *cur_node, - hwloc_obj_t *pu_obj, - int *put_idx_ref); - -/* - * Ordering Support: - * Reorder sequentially - */ -static int rmaps_lama_ordering_sequential(orte_job_t *jdata); - -/* - * Map a single process to a specific node - */ -static int orte_rmaps_lama_map_process(orte_job_t *jdata, - orte_node_t *node, - int app_idx, - orte_proc_t **proc); - -/********************************* - * Main Module function to map a job - *********************************/ -static int orte_rmaps_lama_map(orte_job_t *jdata) -{ - int ret, exit_status = ORTE_SUCCESS; - mca_base_component_t *loc_comp = &mca_rmaps_lama_component.base_version; - - RMAPS_LAMA_CLEAR_TIMERS(); - RMAPS_LAMA_START_TIMER(RMAPS_LAMA_TIMER_TOTAL); - - /* - * Sanity Check: - * If we are not the 'chosen' mapper, then exit here - */ - if (NULL != jdata->map->req_mapper && - 0 != strcasecmp(jdata->map->req_mapper, loc_comp->mca_component_name)) { - /* a mapper has been specified, and it isn't me */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: job %s not using lama mapper (using %s)", - ORTE_JOBID_PRINT(jdata->jobid), - jdata->map->req_mapper); - return ORTE_ERR_TAKE_NEXT_OPTION; - } - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Mapping job %s", - ORTE_JOBID_PRINT(jdata->jobid)); - - /* - * Identify this as the mapper responsible for this job - */ - if (NULL != jdata->map->last_mapper) { - free(jdata->map->last_mapper); - } - jdata->map->last_mapper = strdup(loc_comp->mca_component_name); - - /* - * Start at the beginning... - */ - jdata->num_procs = 0; - - /* - * Process the command line arguments - */ - RMAPS_LAMA_START_TIMER(RMAPS_LAMA_TIMER_PARSE_PARAMS); - if( ORTE_SUCCESS != (ret = orte_rmaps_lama_process_params(jdata)) ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - RMAPS_LAMA_END_TIMER(RMAPS_LAMA_TIMER_PARSE_PARAMS); - - /* - * Actually map the job - */ - if( ORTE_SUCCESS != (ret = orte_rmaps_lama_map_core(jdata)) ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - /* - * All Done - */ - - RMAPS_LAMA_END_TIMER(RMAPS_LAMA_TIMER_TOTAL); - RMAPS_LAMA_DISPLAY_TIMERS(); - - - cleanup: - if( NULL != lama_mapping_layout ) { - free(lama_mapping_layout); - lama_mapping_layout = NULL; - } - - if( NULL != lama_mapping_layout_sort ) { - free(lama_mapping_layout_sort); - lama_mapping_layout_sort = NULL; - } - - if( NULL != lama_mppr_levels ) { - free(lama_mppr_levels); - lama_mppr_levels = NULL; - } - - return exit_status; -} - - -/********************************* - * User defined lookup structure for hwloc topology - *********************************/ -static void rmaps_lama_hwloc_user_construct(rmaps_lama_hwloc_user_t *item) -{ - item->node_mppr = OBJ_NEW(opal_pointer_array_t); - opal_pointer_array_init(item->node_mppr, - ORTE_GLOBAL_ARRAY_BLOCK_SIZE, - ORTE_GLOBAL_ARRAY_MAX_SIZE, - ORTE_GLOBAL_ARRAY_BLOCK_SIZE); -} - -static void rmaps_lama_hwloc_user_destruct(rmaps_lama_hwloc_user_t *item) -{ - orte_std_cntr_t i; - - if( NULL != item->node_mppr ) { - for(i = 0; i < item->node_mppr->size; ++i) { - if( NULL != item->node_mppr->addr[i] ) { - OBJ_RELEASE(item->node_mppr->addr[i]); - item->node_mppr->addr[i] = NULL; - } - } - OBJ_RELEASE(item->node_mppr); - item->node_mppr = NULL; - } -} - - -/********************************* - * Command line parameter parsing functions - *********************************/ -static int orte_rmaps_lama_process_params(orte_job_t *jdata) -{ - int ret, i; - char *type_str = NULL; - - /* - * Process map/bind/order/mppr aliases. It will print its own - * error message if something went wrong. - */ - if( ORTE_SUCCESS != (ret = rmaps_lama_process_alias_params(jdata) ) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * Parse: Binding. It will print its own error message if - * something goes wrong. - */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Binding : [%s]", - rmaps_lama_cmd_bind); - if( ORTE_SUCCESS != (ret = rmaps_lama_parse_binding(rmaps_lama_cmd_bind, - &lama_binding_level, - &lama_binding_num_levels)) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - if( 10 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - type_str = lama_type_enum_to_str(lama_binding_level); - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Binding : %*d x %10s", - MAX_BIND_DIGIT_LEN, lama_binding_num_levels, type_str); - free(type_str); - type_str = NULL; - } - /* Reset the binding option since we are going to do it ourselves */ - OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE); - - /* - * Parse: Mapping from Process Layout string. It will print its - * own error message if something goes wrong. - */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Mapping : [%s]", - rmaps_lama_cmd_map); - if( ORTE_SUCCESS != (ret = rmaps_lama_parse_mapping(rmaps_lama_cmd_map, - &lama_mapping_layout, - &lama_mapping_layout_sort, - &lama_mapping_num_layouts)) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - if( 10 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - for( i = 0; i < lama_mapping_num_layouts; ++i ) { - type_str = lama_type_enum_to_str(lama_mapping_layout[i]); - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Mapping : (%d) %10s (%d vs %d)", - i, type_str, - lama_mapping_layout[i], lama_mapping_layout_sort[i]); - free(type_str); - type_str = NULL; - } - } - - /* - * Parse: MPPR. It will print its own error message if something - * goes wrong. - */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- MPPR : [%s]", - rmaps_lama_cmd_mppr); - if( ORTE_SUCCESS != (ret = rmaps_lama_parse_mppr(rmaps_lama_cmd_mppr, - &lama_mppr_levels, - &lama_mppr_num_levels)) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - if( 10 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - for( i = 0; i < lama_mppr_num_levels; ++i ) { - type_str = lama_type_enum_to_str(lama_mppr_levels[i].type); - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- MPPR : %*d at %10s", - MAX_BIND_DIGIT_LEN, lama_mppr_levels[i].max_resources, type_str); - free(type_str); - type_str = NULL; - } - } - - /* - * Parse: Ordering - */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Ordering : [%s]", - rmaps_lama_cmd_ordering); - if( ORTE_SUCCESS != (ret = rmaps_lama_parse_ordering(rmaps_lama_cmd_ordering, - &lama_ordering)) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - if( 10 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - if( LAMA_ORDER_NATURAL == lama_ordering ) { - type_str = strdup("Natural"); - } - else if( LAMA_ORDER_SEQ == lama_ordering ) { - type_str = strdup("Sequential"); - } - else { - type_str = strdup("Unknown"); - } - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Ordering : %10s", - type_str); - free(type_str); - type_str = NULL; - } - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - - return ORTE_SUCCESS; -} - - -/********************************* - * Support functions - *********************************/ -rmaps_lama_level_type_t lama_type_str_to_enum(char *param) -{ - if( 0 == strncmp(param, "n", strlen("n")) ) { - return LAMA_LEVEL_MACHINE; - } - else if( 0 == strncmp(param, "b", strlen("b")) ) { - return LAMA_LEVEL_BOARD; - } - else if( 0 == strncmp(param, "s", strlen("s")) ) { - return LAMA_LEVEL_SOCKET; - } - else if( 0 == strncmp(param, "c", strlen("c")) ) { - return LAMA_LEVEL_CORE; - } - else if( 0 == strncmp(param, "h", strlen("h")) ) { - return LAMA_LEVEL_PU; - } - else if( 0 == strncmp(param, "L1", strlen("L1")) ) { - return LAMA_LEVEL_CACHE_L1; - } - else if( 0 == strncmp(param, "L2", strlen("L2")) ) { - return LAMA_LEVEL_CACHE_L2; - } - else if( 0 == strncmp(param, "L3", strlen("L3")) ) { - return LAMA_LEVEL_CACHE_L3; - } - else if( 0 == strncmp(param, "N", strlen("N")) ) { - return LAMA_LEVEL_NUMA; - } - - return LAMA_LEVEL_UNKNOWN; -} - -char * lama_type_enum_to_str(rmaps_lama_level_type_t param) -{ - if( LAMA_LEVEL_MACHINE == param ) { - return strdup("Machine"); - } - else if( LAMA_LEVEL_BOARD == param ) { - return strdup("Board"); - } - else if( LAMA_LEVEL_SOCKET == param ) { - return strdup("Socket"); - } - else if( LAMA_LEVEL_CORE == param ) { - return strdup("Core"); - } - else if( LAMA_LEVEL_PU == param ) { - return strdup("Hw. Thread"); - } - else if( LAMA_LEVEL_CACHE_L1 == param ) { - return strdup("L1 Cache"); - } - else if( LAMA_LEVEL_CACHE_L2 == param ) { - return strdup("L2 Cache"); - } - else if( LAMA_LEVEL_CACHE_L3 == param ) { - return strdup("L3 Cache"); - } - else if( LAMA_LEVEL_NUMA == param ) { - return strdup("NUMA"); - } - - return strdup("Unknown"); -} - -/********************************* - * Core Mapper function - *********************************/ -static int orte_rmaps_lama_map_core(orte_job_t *jdata) -{ - int ret, exit_status = ORTE_SUCCESS; - int cur_app_idx = 0; - int num_slots; - orte_app_context_t *cur_app_context = NULL; - orte_node_t *cur_mach = NULL; - orte_node_t **cur_mach_ptr = NULL; - orte_proc_t *proc = NULL; - opal_list_t *node_list = NULL; - opal_list_item_t *item = NULL; - opal_tree_t *max_tree = NULL; - int *pu_idx_ref = NULL; - int *last_pu_idx_ref = NULL; - int i, num_mapped, last_num_mapped, mach_level = -1; - orte_std_cntr_t j; - int max_procs_to_map; - int iter_passes; - char * last_level_str = NULL; - bool initial_map = true; - - /* - * Setup PU reference - * Find the position of the 'machine' - */ - pu_idx_ref = (int*)malloc(sizeof(int) * lama_mapping_num_layouts); - if (NULL == pu_idx_ref) { - return ORTE_ERROR; - } - last_pu_idx_ref = (int*)malloc(sizeof(int) * lama_mapping_num_layouts); - if (NULL == last_pu_idx_ref) { - free(pu_idx_ref); - return ORTE_ERROR; - } - - for( i = 0; i < lama_mapping_num_layouts; ++i ) { - pu_idx_ref[i] = 0; - last_pu_idx_ref[i] = -1; - if( LAMA_LEVEL_MACHINE == lama_mapping_layout[i] ) { - mach_level = i; - } - } - - /* - * Foreach app context - */ - for(cur_app_idx = 0; cur_app_idx < jdata->apps->size; ++cur_app_idx ) { - if( NULL == (cur_app_context = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, cur_app_idx))) { - continue; - } - - /* - * Get the list of nodes for this app_context. - */ - node_list = OBJ_NEW(opal_list_t); - ret = orte_rmaps_base_get_target_nodes(node_list, - &num_slots, - cur_app_context, - jdata->map->mapping, - initial_map, false); - if(ORTE_SUCCESS != ret ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - /* Flag that all subsequent requests should not reset the node->mapped flag */ - initial_map = false; - - /* - * If a bookmark exists from some prior mapping, then start from there - */ - cur_mach = (orte_node_t*)orte_rmaps_base_get_starting_point(node_list, jdata); - - /* - * If the application did not specify the number of procs - * then set it to the number of 'slots' - * JJH: TODO: Revisit 'max_procs' calculation - */ - if (0 == cur_app_context->num_procs) { - cur_app_context->num_procs = num_slots; - } - max_procs_to_map = cur_app_context->num_procs; - - /* - * Build the Max Tree - */ - RMAPS_LAMA_START_TIMER(RMAPS_LAMA_TIMER_BUILD_MAX_TREE); - max_tree = rmaps_lama_create_empty_max_tree(); - if( ORTE_SUCCESS != (ret = rmaps_lama_build_max_tree(jdata, node_list, - max_tree, - &lama_mppr_max_tree_homogeneous_system)) ) { - exit_status = ret; - goto cleanup; - } - RMAPS_LAMA_END_TIMER(RMAPS_LAMA_TIMER_BUILD_MAX_TREE); - - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Mapping: -----------------------"); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - RMAPS_LAMA_START_TIMER(RMAPS_LAMA_TIMER_MAPPING); - - /* - * Clear PU reference - */ - for( i = 0; i < lama_mapping_num_layouts; ++i ) { - pu_idx_ref[i] = 0; - } - - /* - * Mapping: Recursively loop over all levels - */ - num_mapped = 0; - last_num_mapped = 0; - iter_passes = 0; - cur_mach_ptr = (orte_node_t**)malloc(sizeof(orte_node_t*)); - *cur_mach_ptr = cur_mach; - while( max_procs_to_map > num_mapped ) { - ret = rmaps_lama_map_core_iter_level(jdata, - cur_app_context, - node_list, - cur_mach_ptr, - max_tree, - lama_mapping_num_layouts-1, - mach_level, - &pu_idx_ref, - &last_pu_idx_ref, - &num_mapped, - max_procs_to_map, - &iter_passes); - if( ORTE_SUCCESS != ret ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - /* - * We only get here (without finishing the mapping) if we are going to - * start oversubscribing resources. - */ - if( max_procs_to_map > num_mapped ) { - if( !rmaps_lama_can_oversubscribe ) { - orte_show_help("help-orte-rmaps-lama.txt", - "orte-rmaps-lama:oversubscribe", - true, - num_mapped, max_procs_to_map); - exit_status = ORTE_ERROR; - goto cleanup; - } else { - rmaps_lama_am_oversubscribing = true; - } - } - - /* - * Check to see if we have made any progress in the mapping loop - */ - if( 0 < cur_app_idx && 2 == iter_passes ) { - /* - * Give it another pass: - * This is an edge case when we are trying to restart from a - * bookmark left by a previous app context. If this app context - * is starting from exactly the beginning of the allocation - * then the recursive loop could return out here after the - * increment pass. This is indicated by (iter_passes = 2). - * Since no processes were mapped, we just try again. - */ - } - else if( last_num_mapped == num_mapped ) { - orte_show_help("help-orte-rmaps-lama.txt", - "orte-rmaps-lama:no-resources-available", - true, - cur_app_idx, - num_mapped, max_procs_to_map, - (NULL == rmaps_lama_cmd_map ? "[Not Provided]" : rmaps_lama_cmd_map), - (NULL == rmaps_lama_cmd_bind ? "[Not Provided]" : rmaps_lama_cmd_bind), - (NULL == rmaps_lama_cmd_mppr ? "[Not Provided]" : rmaps_lama_cmd_mppr), - (NULL == rmaps_lama_cmd_ordering ? "[Not Provided]" : rmaps_lama_cmd_ordering)); - exit_status = ORTE_ERROR; - goto cleanup; - } else { - last_num_mapped = num_mapped; - } - } - - /* - * Display Bookmark for debugging - */ - last_level_str = pu_ref_to_str(last_pu_idx_ref, lama_mapping_num_layouts); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Bookmark: --> Node %10s PU %10s", - jdata->bookmark->name, last_level_str); - free(last_level_str); - last_level_str = NULL; - - /* - * Clenup for next iteration - */ - if( NULL != node_list ) { - while(NULL != (item = opal_list_remove_first(node_list))) { - OBJ_RELEASE(item); - } - OBJ_RELEASE(node_list); - node_list = NULL; - } - - OBJ_RELEASE(max_tree); - max_tree = NULL; - } - - RMAPS_LAMA_END_TIMER(RMAPS_LAMA_TIMER_MAPPING); - - - /* - * Ordering - */ - RMAPS_LAMA_START_TIMER(RMAPS_LAMA_TIMER_ORDERING); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - if( LAMA_ORDER_SEQ == lama_ordering ) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Ordering: Sequential ------------"); - - if( ORTE_SUCCESS != (ret = rmaps_lama_ordering_sequential(jdata)) ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - } - else { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Ordering: Natural ---------------"); -#if 0 - /* - * We compute our own vpids inline with the algorithm. So no need to use the - * orte_rmaps_base_compute_vpids() function. - */ -#endif - } - RMAPS_LAMA_END_TIMER(RMAPS_LAMA_TIMER_ORDERING); - - - /* - * Display Mapping - */ - if( 10 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - char *cpu_bitmap; - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - for( j = 0; j < jdata->procs->size; ++j) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, j))) { - continue; - } - cpu_bitmap = NULL; - orte_get_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, (void**)&cpu_bitmap, OPAL_STRING); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Ordering: Proc. %2d on Node %10s - Slot %s", - proc->name.vpid, proc->node->name, cpu_bitmap); - if (NULL != cpu_bitmap) { - free(cpu_bitmap); - } - } - } - - - /* - * All done - */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Finished ------------------------"); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - - - cleanup: - if( NULL != node_list ) { - while(NULL != (item = opal_list_remove_first(node_list))) { - OBJ_RELEASE(item); - } - OBJ_RELEASE(node_list); - } - - if( NULL != max_tree ) { - OBJ_RELEASE(max_tree); - } - - free(pu_idx_ref); - free(last_pu_idx_ref); - - if( NULL != last_level_str ) { - free(last_level_str); - } - - return exit_status; -} - -static int rmaps_lama_map_core_iter_level(orte_job_t *jdata, - orte_app_context_t *cur_app_context, - opal_list_t *node_list, - orte_node_t **cur_mach_ptr, - opal_tree_t *max_tree, - int cur_level, - int mach_level, - int **pu_idx_ref, - int **last_pu_idx_ref, - int *num_mapped, - int max_procs, - int *iter_passes) -{ - int ret, exit_status = ORTE_SUCCESS; - int i, j; - opal_tree_item_t *tree_for_level = NULL; - int max_subtree_arity = 0; - char * level_str = NULL; - char * last_level_str = NULL; - char * slot_list = NULL; - orte_proc_t *proc = NULL; - int pu_idx = 0; - - /* - * Find the current tree for this level - * If it is the machine level, then we need to access the information from - * the node list, not the max_tree. - */ - if( cur_level != mach_level ) { - tree_for_level = opal_tree_find_with(opal_tree_get_root(max_tree), - &lama_mapping_layout[cur_level]); - /* - * We do not need subtree, but the arity of the subtree - * JJH TODO: This should be an opal_tree function. - */ - max_subtree_arity = 1; /* include self */ - while( NULL != (tree_for_level = opal_tree_get_next_sibling(tree_for_level)) ) { - ++max_subtree_arity; - } - } - else if( NULL == *cur_mach_ptr ) { - *cur_mach_ptr = get_next_machine(jdata, node_list, (opal_list_item_t*)(*cur_mach_ptr)); - } - - pu_idx = convert_layer_to_sort_idx(lama_mapping_layout[cur_level]); - level_str = lama_type_enum_to_str(lama_mapping_layout[cur_level]); - - /* - * Do we need to advance to a bookmark - */ - if( (*last_pu_idx_ref)[0] >= 0 && 0 == *iter_passes ) { - /* - * Display last mapped - */ - last_level_str = pu_ref_to_str(*last_pu_idx_ref, lama_mapping_num_layouts); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Bookmark: --> Last Mapped: Node %10s (bkmrk %10s) PU %10s - Level %2d", - (NULL == *cur_mach_ptr ? "(NULL)" : (*cur_mach_ptr)->name), - jdata->bookmark->name, last_level_str, (*last_pu_idx_ref)[pu_idx]); - free(last_level_str); - last_level_str = NULL; - - /* - * Set the level starting point to the last known index - */ - i = (*last_pu_idx_ref)[pu_idx]; - } else { - i = 0; - } - - - /* - * Loop over all siblings at this level - * Initial condition above, Increment at bottom, Break check at bottom - */ - while( 1 ) { - /* - * Define the PU index - */ - (*pu_idx_ref)[pu_idx] = i; - - if( (*last_pu_idx_ref)[0] >= 0 && 0 == *iter_passes ) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Mapping: --> Level %2d: %10s (%2d) - I %2d - Arity %2d - %10s - Increment only", - cur_level+1, - level_str, pu_idx, i, max_subtree_arity, - (NULL == *cur_mach_ptr ? "" : (*cur_mach_ptr)->name)); - } else { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Mapping: --> Level %2d: %10s (%2d) - I %2d - Arity %2d - %10s", - cur_level+1, - level_str, pu_idx, i, max_subtree_arity, - (NULL == *cur_mach_ptr ? "" : (*cur_mach_ptr)->name)); - } - - - /* - * If not the inner most loop, iterate to the next level down - */ - if( cur_level > 0 ) { - ret = rmaps_lama_map_core_iter_level(jdata, - cur_app_context, - node_list, - cur_mach_ptr, - max_tree, - cur_level - 1, - mach_level, - pu_idx_ref, - last_pu_idx_ref, - num_mapped, - max_procs, - iter_passes); - if( ORTE_SUCCESS != ret ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - } - /* - * If we are restarting the iteration from a previous bookmark then - * the first pass through is a no-op mapping pass that just increments - * the PU reference. - * Called by innermost loop - */ - else if( (*last_pu_idx_ref)[0] >= 0 && 0 == *iter_passes ) { - *iter_passes += 1; - } - /* - * Try to map at this location - */ - else { - /* - * On first pass, make sure we increment this, just so we do not - * accidentally think this is an increment pass. - */ - if( 0 == *iter_passes ) { - *iter_passes += 1; - } - - /* - * Display the PU ref for debugging - */ - display_pu_ref(*pu_idx_ref, lama_mapping_num_layouts, *num_mapped, proc); - - - /* - * Check to see if this resource is available on this node. - * - * In a heterogeneous or otherwise non-uniformly restricted - * environment we may iterate to a resource that is not - * available either because it does not exist, or is not - * available for allocation (off-lined, sub-node allocation). - * Additionally, we need to check resource constrains expressed - * in the MPPR and binding. - */ - ret = check_node_availability((*cur_mach_ptr), - max_tree, - *pu_idx_ref, - &slot_list); - if( ORTE_SUCCESS != ret || NULL == slot_list ) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:hwtopo: Mapping: --> Level %2d: %s - INVALID/SKIP", - cur_level+1, - level_str); - /* - * By not mapping here we just let the iterations continue - * until a suitable match is found or we have exhausted all - * possible locations to match and thus cannot map any more. - */ - } - else { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Mapping: --> Level %2d: %s - Slot List (%s)", - cur_level+1, - level_str, slot_list); - - /* - * Map this process onto the resource specified - * level_tree_objs[*] and cur_mach point to the specific resource - */ - proc = NULL; - ret = orte_rmaps_lama_map_process(jdata, - (*cur_mach_ptr), - cur_app_context->idx, - &proc); - if( ORTE_SUCCESS != ret ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto bailout; - } - - /* - * Set the binding for this process - */ - orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, slot_list, OPAL_STRING); - /* - * Insert the proc into the 'native' ordering location. - */ - proc->name.vpid = jdata->num_procs; - if (ORTE_SUCCESS != (ret = opal_pointer_array_set_item(jdata->procs, - proc->name.vpid, proc))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - jdata->num_procs += 1; - - /* - * Save a bookmark so we can return here later if necessary - */ - for( j = 0; j < lama_mapping_num_layouts; ++j ) { - (*last_pu_idx_ref)[j] = (*pu_idx_ref)[j]; - } - jdata->bookmark = (orte_node_t*)(*cur_mach_ptr); - - (*num_mapped)++; - } - } - - /* - * Increment loop - * - * If we are binding, then we may need to advance the binding layer - * by more than one. - */ - if( cur_level != mach_level ) { - if( lama_binding_level == lama_mapping_layout[cur_level] ) { - i += lama_binding_num_levels; - } else { - ++i; - } - } else { - /* - * Note: Currently we do not allow for 'binding' to multiple machines - * But keep the code just in case we want to play with 'stride' later - */ - if( lama_binding_level == lama_mapping_layout[cur_level] && lama_binding_num_levels > 1) { - opal_output(0, "mca:rmaps:lama: ERROR: Cannot bind to multiple machines - SHOULD NEVER HAPPEN: %s", - rmaps_lama_cmd_bind); - exit_status = ORTE_ERROR; - goto bailout; -#if 0 - for( j = 0; j < lama_binding_num_levels; ++j ) { - cur_mach = get_next_machine(jdata, node_list, (opal_list_item_t*)cur_mach); - if( NULL == cur_mach ) { - break; - } - ++i; - } -#endif - } else { - *cur_mach_ptr = get_next_machine(jdata, node_list, (opal_list_item_t*)(*cur_mach_ptr)); - ++i; - } - } - - /* - * Check if we are done mapping before iterating again - */ - if( max_procs <= *num_mapped ) { - exit_status = ORTE_SUCCESS; - goto cleanup; - } - - /* - * Check if we are done looping - */ - if( cur_level != mach_level ) { - if( i >= max_subtree_arity ) { - break; - } - } else { - if( NULL == *cur_mach_ptr ) { - break; - } - } - } - - - /* - * Sanity Check: Check if we are done mapping - */ - if( max_procs <= *num_mapped ) { - exit_status = ORTE_SUCCESS; - goto cleanup; - } - - cleanup: - /* - * If the outermost layer, the increment the number of iteration passes. - */ - if( cur_level == lama_mapping_num_layouts-1 ) { - *iter_passes += 1; - } - - bailout: - if( NULL != level_str ) { - free(level_str); - level_str = NULL; - } - - if( NULL != slot_list ) { - free(slot_list); - slot_list = NULL; - } - - return exit_status; -} - -static orte_node_t* get_next_machine(orte_job_t *jdata, opal_list_t *node_list, - opal_list_item_t *cur_mach) -{ - orte_node_t *next_mach = NULL; - - if( NULL == cur_mach ) { - next_mach = (orte_node_t*)opal_list_get_first(node_list); - } - else if( opal_list_get_last(node_list) == cur_mach ) { - next_mach = NULL; - } - else { - next_mach = (orte_node_t*)opal_list_get_next(cur_mach); - } - - return next_mach; -} - -static int orte_rmaps_lama_map_process(orte_job_t *jdata, - orte_node_t *node, - int app_idx, - orte_proc_t **proc) -{ - int ret; - - /* - * Add this node to the map, but only once - */ - if( !ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED) ) { - if (ORTE_SUCCESS > (ret = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { - ORTE_ERROR_LOG(ret); - return ret; - } - ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); - OBJ_RETAIN(node); /* maintain accounting on object */ - ++(jdata->map->num_nodes); - } - - /* - * Setup the process object - */ - if (NULL == (*proc = orte_rmaps_base_setup_proc(jdata, node, app_idx))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - return ORTE_SUCCESS; -} - -static int rmaps_lama_ordering_sequential(orte_job_t *jdata) -{ - orte_job_map_t *map; - orte_proc_t *proc = NULL, *swap = NULL; - orte_std_cntr_t i, j; - int cur_rank = 0; - orte_node_t *cur_node = NULL; - - map = jdata->map; - - opal_output_verbose(15, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - - /* - * Assign the ranks sequentially - */ - for( i = 0; i < map->nodes->size; ++i) { - if (NULL == (cur_node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { - continue; - } - for( j = 0; j < cur_node->procs->size; ++j) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(cur_node->procs, j))) { - continue; - } - /* ignore procs from other jobs */ - if (proc->name.jobid != jdata->jobid) { - continue; - } - - opal_output_verbose(15, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Ordering: Rename Proc. %2d to %2d (Rev. %s)", - proc->name.vpid, cur_rank, proc->node->name); - proc->name.vpid = cur_rank; - ++cur_rank; - } - } - - /* - * Fix the job structure ordering - Sort by new vpid - * - * If we do not do this then the remote daemons assign the incorrect - * ranks to the processes since they use the relative ordering in the - * jdata->procs structure to determine vpids locally. - * - * JJH: Look at combining these loops with the loop in the core so we - * JJH: do not have to iterate over the list two times - */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - cur_rank = 0; - for( j = 0; j < jdata->procs->size; ++j) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, j))) { - continue; - } - - opal_output_verbose(15, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Ordering: Proc. %2d on Node %s", - proc->name.vpid, proc->node->name); - - while((int)proc->name.vpid != cur_rank ) { - swap = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid); - - opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc); - opal_pointer_array_set_item(jdata->procs, cur_rank, swap); - - opal_output_verbose(15, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Ordering: \t SWAP Proc. %2d (%d) and Proc. %2d (%d)", - proc->name.vpid, cur_rank, swap->name.vpid, proc->name.vpid); - proc = swap; - } - ++cur_rank; - } - - return ORTE_SUCCESS; -} - -static int convert_layer_to_sort_idx(rmaps_lama_level_type_t layer) -{ - int i; - - for(i = 0; i < lama_mapping_num_layouts; ++i ) { - if( lama_mapping_layout_sort[i] == layer ) { - return i; - } - } - - return 0; -} - -static void display_pu_ref(int *ref, int size, int rank, orte_proc_t *proc) -{ - char *str = NULL; - - str = pu_ref_to_str(ref, size); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Mapping: PU Ref: %s [Rank %2d] Name: %s", - str, rank, - (NULL == proc ? "(null)" : ORTE_NAME_PRINT(&proc->name))); - - free(str); - - return; -} - -static char * pu_ref_to_str(int *ref, int size) -{ - int i, idx; - char *str = NULL; - - str = (char *)malloc(sizeof(char) * (2 * size)); - for(i = 0, idx = 0; i < size; ++i, idx += 2) { - sprintf(&(str[idx]), "%2d", ref[i]); - } - - return str; -} - -static int check_node_availability(orte_node_t *cur_node, - opal_tree_t *max_tree, - int *pu_idx_ref, - char **slot_list) -{ - int exit_status = ORTE_SUCCESS; - int i; - char * level_str = NULL; - hwloc_obj_t *topo_child = NULL, *topo_parent, *topo_allocated; - - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Checking: Node (%s) -------------", - cur_node->name); - opal_output_verbose(11, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - - - /* - * Determine if the current node has the necessary hardware - * as described by the PU index. - * Find the hwloc object reference for the resource pointed to - * by the PU index. - * JJH TODO: If homogeneous system then this could be simplified. - */ - topo_allocated = topo_parent = (hwloc_obj_t*)malloc(sizeof(hwloc_obj_t) * 1); - if (NULL == topo_parent) { - return ORTE_ERROR; - } - *topo_parent = hwloc_get_obj_by_depth(cur_node->topology, 0, 0); - for( i = 0; i < lama_mapping_num_layouts; ++i ) { - /* - * Skip 'machine' level - */ - if( LAMA_LEVEL_MACHINE == lama_mapping_layout_sort[i] ) { - continue; - } - /* - * Skip 'board' level - * JJH: HWLOC does not support BOARD at the moment - */ - if( LAMA_LEVEL_BOARD == lama_mapping_layout_sort[i] ) { - continue; - } - - level_str = lama_type_enum_to_str(lama_mapping_layout_sort[i]); - opal_output_verbose(11, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Checking: %2d of %s", - pu_idx_ref[i], level_str); - - /* - * Find the nth subtree matching the current key - */ - topo_child = rmaps_lama_find_nth_subtree_match(cur_node->topology, - *topo_parent, - pu_idx_ref[i], - lama_mapping_layout_sort[i]); - - /* - * If it does not exist, then this node is not capable of matching - * so it is unavailable. - */ - if( NULL == topo_child ) { - opal_output_verbose(11, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Check failed: Node %s does not have a %10s %2d", - cur_node->name, level_str, pu_idx_ref[i]); - exit_status = ORTE_ERROR; - goto cleanup; - } - - /* - * Keep decending the tree - */ - topo_parent = topo_child; - free(level_str); - level_str = NULL; - } - - /* - * We have sufficient hardware :) - */ - - - /* - * Return the native slot list to bind to - * Internally checks the MPPR - */ - *slot_list = get_native_slot_list(cur_node, topo_parent, pu_idx_ref); - if( NULL == *slot_list ) { - goto cleanup; - } - - cleanup: - if( NULL != level_str ) { - free(level_str); - level_str = NULL; - } - - if( ORTE_SUCCESS != exit_status ) { - if( NULL != *slot_list ) { - free(*slot_list); - *slot_list = NULL; - } - } - - free(topo_allocated); - - return exit_status; -} - -static int rmaps_lama_check_mppr(orte_node_t *node, - hwloc_obj_t *child_obj) -{ - int ret; - - /* - * Optimization if no MPPR provided - */ - if( NULL == lama_mppr_levels ) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: No MPPR to check - Skip..."); - return ORTE_SUCCESS; - } - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Check ---------------------------"); - /* - * Check Parents (excluding self) - */ - if( ORTE_SUCCESS != (ret = rmaps_lama_iter_mppr_parents(node, &(*child_obj)->parent, true)) ) { - return ret; - } - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Check ---------------------------"); - - /* - * Check Children (including self) - */ - if( ORTE_SUCCESS != (ret = rmaps_lama_iter_mppr_children(node, child_obj, true)) ) { - return ret; - } - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Check ---------------------------"); - - return ORTE_SUCCESS; -} - -static int rmaps_lama_inc_mppr(orte_node_t *node, - hwloc_obj_t *child_obj) -{ - int ret; - - /* - * Optimization if no MPPR provided - */ - if( NULL == lama_mppr_levels ) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: No MPPR to increment - Skip..."); - return ORTE_SUCCESS; - } - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Inc ---------------------------"); - /* - * Increment Parents (excluding self) - */ - if( ORTE_SUCCESS != (ret = rmaps_lama_iter_mppr_parents(node, &(*child_obj)->parent, false)) ) { - return ret; - } - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Inc ---------------------------"); - - /* - * Increment Children (including self) - */ - if( ORTE_SUCCESS != (ret = rmaps_lama_iter_mppr_children(node, child_obj, false)) ) { - return ret; - } - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Inc ---------------------------"); - - return ORTE_SUCCESS; -} - -static int rmaps_lama_iter_mppr_parents(orte_node_t *node, - hwloc_obj_t *child_obj, - bool check_only) -{ - rmaps_lama_hwloc_user_t *hwloc_userdata = NULL; - rmaps_lama_node_mppr_t *mppr_accounting = NULL; - char str[128]; - - /* - * Basecase - */ - if( NULL == *child_obj ) { - return ORTE_SUCCESS; - } - - /* - * Check self - */ - /* - * Access MPPR info for this object - */ - hwloc_userdata = (rmaps_lama_hwloc_user_t*)((opal_hwloc_topo_data_t*)(*child_obj)->userdata)->userdata; - mppr_accounting = (rmaps_lama_node_mppr_t*)opal_pointer_array_get_item(hwloc_userdata->node_mppr, node->index); - - hwloc_obj_snprintf(str, sizeof(str), node->topology, *child_obj, "#", 0); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: %s: P [%2d] %10s - %20s - Max %3d , Cur %3d (Oversub.: %s / %s)", - (check_only ? "Checking " : "Increment"), - node->index, node->name, str, - mppr_accounting->max, - (check_only ? mppr_accounting->cur : mppr_accounting->cur + 1), - (rmaps_lama_am_oversubscribing ? "T" : "F"), - (rmaps_lama_can_oversubscribe ? "T" : "F") ); - - /* - * Check limits - Error on first to exceed - */ - if( check_only ) { - if( mppr_accounting->max >= 0 && !rmaps_lama_am_oversubscribing) { - if( (mppr_accounting->cur)+1 > mppr_accounting->max ) { - return ORTE_ERROR; - } - } - } - /* - * Increment current number allocated below this level - */ - else { - mppr_accounting->cur += 1; - } - - /* - * Go to parent - */ - return rmaps_lama_iter_mppr_parents(node, &((*child_obj)->parent), check_only); -} - -static int rmaps_lama_iter_mppr_children(orte_node_t *node, - hwloc_obj_t *child_obj, - bool check_only) -{ - int ret; - rmaps_lama_hwloc_user_t *hwloc_userdata = NULL; - rmaps_lama_node_mppr_t *mppr_accounting = NULL; - char str[128]; - int i; - - /* - * Check self - */ - /* - * Access MPPR info for this object - */ - hwloc_userdata = (rmaps_lama_hwloc_user_t*)((opal_hwloc_topo_data_t*)(*child_obj)->userdata)->userdata; - mppr_accounting = (rmaps_lama_node_mppr_t*)opal_pointer_array_get_item(hwloc_userdata->node_mppr, node->index); - - hwloc_obj_snprintf(str, sizeof(str), node->topology, *child_obj, "#", 0); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: %s: C [%2d] %10s - %20s - Max %3d , Cur %3d (Oversub.: %s / %s)", - (check_only ? "Checking " : "Increment"), - node->index, node->name, str, - mppr_accounting->max, - (check_only ? mppr_accounting->cur : mppr_accounting->cur + 1), - (rmaps_lama_am_oversubscribing ? "T" : "F"), - (rmaps_lama_can_oversubscribe ? "T" : "F") ); - - /* - * Check limits - Error on first to exceed - */ - if( check_only ) { - if( mppr_accounting->max >= 0 && !rmaps_lama_am_oversubscribing) { - if( (mppr_accounting->cur)+1 > mppr_accounting->max ) { - return ORTE_ERROR; - } - } - } - /* - * Increment current number allocated below this level - */ - else { - mppr_accounting->cur += 1; - } - - /* - * Check all children - */ - for(i = 0; i < (int)(*child_obj)->arity; ++i ) { - if( ORTE_SUCCESS != (ret = rmaps_lama_iter_mppr_children(node, &((*child_obj)->children[i]), check_only)) ) { - return ret; - } - } - - return ORTE_SUCCESS; -} - - -static char * get_native_slot_list(orte_node_t *cur_node, hwloc_obj_t *pu_obj, int *put_idx_ref) -{ - int i; - char *slot_list = NULL; - hwloc_obj_t *binding_parent = NULL; - hwloc_obj_t *cur_parent = NULL; - hwloc_cpuset_t binding_cpuset; - hwloc_cpuset_t scratch_cpuset; - char *type_str = NULL; - - /* - * Sanity check - */ - if( NULL == pu_obj ) { - return NULL; - } - - /* - * Determine the cpumask to send to the backend for binding - */ - - /* - * Iterate up the tree until we reach the binding parent - */ - binding_parent = rmaps_lama_find_parent(cur_node->topology, pu_obj, lama_binding_level); - if( NULL == binding_parent ) { - return NULL; - } - - /* - * Iterate across cousins until we find enough resources or hit the node boundary - */ - binding_cpuset = hwloc_bitmap_alloc(); - hwloc_bitmap_zero(binding_cpuset); - - scratch_cpuset = hwloc_bitmap_alloc(); - - cur_parent = binding_parent; - - for(i = 0; i < lama_binding_num_levels; ++i) { - /* - * Check MPPR Availability - */ - if( ORTE_SUCCESS != rmaps_lama_check_mppr(cur_node, cur_parent) ) { - goto cleanup; - } - - /* - * Accumulate the bitmask - * - * JJH: TODO: Add resource offline check (?) - */ - hwloc_bitmap_zero(scratch_cpuset); - /* JJH: Maybe use opal_hwloc_base_get_available_cpus(cur_node->topology, (*cur_parent)) ? - * They do pretty much the same thing, but with more checks... - */ - hwloc_bitmap_and(scratch_cpuset, (*cur_parent)->allowed_cpuset, (*cur_parent)->online_cpuset); - hwloc_bitmap_or(binding_cpuset, scratch_cpuset, binding_cpuset); - -#if 0 - { - hwloc_obj_snprintf(str, sizeof(str), cur_node->topology, *cur_parent, "#", 0); - printf("--> BINDING TO -- %-20s \t -- %2d of %2d -- %2d vs %2d\n",str, - i, lama_binding_level, - (*binding_parent)->logical_index, (*cur_parent)->logical_index); - - hwloc_bitmap_snprintf(str, sizeof(str), (*cur_parent)->allowed_cpuset ); - printf("--> CPU A : %-20s\n", str); - hwloc_bitmap_snprintf(str, sizeof(str), (*cur_parent)->online_cpuset ); - printf("--> CPU B : %-20s\n", str); - hwloc_bitmap_snprintf(str, sizeof(str), scratch_cpuset); - printf("--> CPU C : %-20s\n", str); - hwloc_bitmap_snprintf(str, sizeof(str), binding_cpuset); - printf("--> CPU D : %-20s\n", str); - } -#endif - - /* - * Iterate to the next cousin. - * If we exceed the boundary of the node, then send up an error. - */ - if( (i+1) < lama_binding_num_levels && NULL == (*cur_parent)->next_cousin ) { - type_str = lama_type_enum_to_str(lama_binding_level); - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Error: Not able to bind to %*d x %10s - Stopped at %*d", - MAX_BIND_DIGIT_LEN, lama_binding_num_levels, - type_str, - MAX_BIND_DIGIT_LEN, i); - free(type_str); - type_str = NULL; - goto cleanup; - } - /* - * Point to the next cousin - */ - if( NULL != (*cur_parent)->next_cousin ) { - cur_parent = &((*cur_parent)->next_cousin); - } - } - - /* - * Account for the process placement in the MPPR - * Assumes a previous check - * We cannot do this in the loop, since if the MPPR check fails we would - * need to roll back previous increments. - */ - cur_parent = binding_parent; - for(i = 0; i < lama_binding_num_levels; ++i) { - /* - * Account for the process placement in the MPPR - * Assumes a previous check. - */ - if( ORTE_SUCCESS != rmaps_lama_inc_mppr(cur_node, cur_parent) ) { - goto cleanup; - } - - /* - * Point to the next cousin - */ - if( NULL != (*cur_parent)->next_cousin ) { - cur_parent = &((*cur_parent)->next_cousin); - } - } - - /* - * Convert the cpuset to a slot_list for the remote daemon - */ - hwloc_bitmap_list_asprintf(&slot_list, binding_cpuset); - - cleanup: - hwloc_bitmap_free(scratch_cpuset); - hwloc_bitmap_free(binding_cpuset); - free(binding_parent); - - return slot_list; -} - - -/********************************* - * Timer Support - *********************************/ -static double rmaps_lama_get_time(void) -{ - double wtime; - -#if OPAL_TIMER_USEC_NATIVE - wtime = (double)opal_timer_base_get_usec() / 1000000.0; -#else - struct timeval tv; - gettimeofday(&tv, NULL); - wtime = tv.tv_sec; - wtime += (double)tv.tv_usec / 1000000.0; -#endif - - return wtime; -} - -static void rmaps_lama_set_time(int idx, bool is_start) -{ - if(idx < RMAPS_LAMA_TIMER_MAX ) { - if( is_start ) { - timer_start[idx] = rmaps_lama_get_time(); - } else { - timer_end[idx] = rmaps_lama_get_time(); - timer_accum[idx] += timer_end[idx] - timer_start[idx]; - } - } -} - -static void rmaps_lama_display_all_timers(void) -{ - double diff = 0.0; - double total = 0.0; - char * label = NULL; - - opal_output(0, - "mca:rmaps:lama: Timing: ---------------------------\n"); - - /* - * Timer: Parse Parameters - */ - label = strdup("Parse Params"); - diff = timer_accum[RMAPS_LAMA_TIMER_PARSE_PARAMS]; - rmaps_lama_display_indv_timer_core(diff, label); - free(label); - total += diff; - - /* - * Timer: Build Max Tree - */ - label = strdup("Build Max Tree"); - diff = timer_accum[RMAPS_LAMA_TIMER_BUILD_MAX_TREE]; - rmaps_lama_display_indv_timer_core(diff, label); - free(label); - total += diff; - - /* - * Timer: Mapping - */ - label = strdup("Mapping"); - diff = timer_accum[RMAPS_LAMA_TIMER_MAPPING]; - rmaps_lama_display_indv_timer_core(diff, label); - free(label); - total += diff; - - /* - * Timer: Ordering - */ - label = strdup("Ordering"); - diff = timer_accum[RMAPS_LAMA_TIMER_ORDERING]; - rmaps_lama_display_indv_timer_core(diff, label); - free(label); - total += diff; - - /* - * Timer: Total Overhead - */ - label = strdup("Other Overhead"); - diff = timer_accum[RMAPS_LAMA_TIMER_TOTAL]; - rmaps_lama_display_indv_timer_core(diff - total, label); - free(label); - - /* - * Timer: Total - */ - label = strdup("Total"); - diff = timer_accum[RMAPS_LAMA_TIMER_TOTAL]; - rmaps_lama_display_indv_timer_core(diff, label); - free(label); - - opal_output(0, - "mca:rmaps:lama: ---------------------------------"); -} - -static void rmaps_lama_clear_timers(void) -{ - int i; - for(i = 0; i < RMAPS_LAMA_TIMER_MAX; ++i) { - timer_start[i] = 0.0; - timer_end[i] = 0.0; - timer_accum[i] = 0.0; - } -} - - -static void rmaps_lama_display_indv_timer_core(double diff, char *str) -{ - double perc = 0; - double total = 0; - - total = timer_end[RMAPS_LAMA_TIMER_TOTAL] - timer_start[RMAPS_LAMA_TIMER_TOTAL]; - perc = (diff/total) * 100; - - opal_output(0, - "mca:rmaps:lama: \t%-20s = %10.2f ms\t%6.2f %s\n", - str, (diff * 1000), perc, "%"); - return; -} diff --git a/orte/mca/rmaps/lama/rmaps_lama_params.c b/orte/mca/rmaps/lama/rmaps_lama_params.c deleted file mode 100644 index a5df2ff422..0000000000 --- a/orte/mca/rmaps/lama/rmaps_lama_params.c +++ /dev/null @@ -1,878 +0,0 @@ -/* - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * Processing for command line interface options - * - */ -#include "rmaps_lama.h" - -#include "opal/util/argv.h" - -#include "orte/mca/rmaps/base/rmaps_private.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/util/show_help.h" - -#include - -/********************************* - * Local Functions - *********************************/ -/* - * QSort: Integer comparison - */ -static int lama_parse_int_sort(const void *a, const void *b); - -/* - * Convert the '-ppr' syntax from the 'ppr' component to the 'lama' '-mppr' syntax. - */ -static char * rmaps_lama_covert_ppr(char * given_ppr); - -/********************************* - * Parsing Functions - *********************************/ -int rmaps_lama_process_alias_params(orte_job_t *jdata) -{ - int exit_status = ORTE_SUCCESS; - - /* - * Mapping options - * Note: L1, L2, L3 are not exposed in orterun to the user, so - * there is no need to specify them here. - */ - if( NULL == rmaps_lama_cmd_map ) { - /* orte_rmaps_base.mapping */ - switch( ORTE_GET_MAPPING_POLICY(jdata->map->mapping) ) { - case ORTE_MAPPING_BYNODE: - /* rmaps_lama_cmd_map = strdup("nbNsL3L2L1ch"); */ - rmaps_lama_cmd_map = strdup("nbsch"); - break; - case ORTE_MAPPING_BYBOARD: - /* rmaps_lama_cmd_map = strdup("bnNsL3L2L1ch"); */ - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - "by board", "mapping by board not supported by LAMA"); - exit_status = ORTE_ERR_NOT_SUPPORTED; - goto cleanup; - break; - case ORTE_MAPPING_BYNUMA: - /* rmaps_lama_cmd_map = strdup("NbnsL3L2L1ch"); */ - rmaps_lama_cmd_map = strdup("Nbnsch"); - break; - case ORTE_MAPPING_BYSOCKET: - /* rmaps_lama_cmd_map = strdup("sNbnL3L2L1ch"); */ - rmaps_lama_cmd_map = strdup("sbnch"); - break; - case ORTE_MAPPING_BYL3CACHE: - rmaps_lama_cmd_map = strdup("L3sNbnL2L1ch"); - break; - case ORTE_MAPPING_BYL2CACHE: - rmaps_lama_cmd_map = strdup("L2sNbnL1ch"); - break; - case ORTE_MAPPING_BYL1CACHE: - rmaps_lama_cmd_map = strdup("L1sNbnch"); - break; - case ORTE_MAPPING_BYCORE: - case ORTE_MAPPING_BYSLOT: - /* rmaps_lama_cmd_map = strdup("cL1L2L3sNbnh"); */ - rmaps_lama_cmd_map = strdup("csbnh"); - break; - case ORTE_MAPPING_BYHWTHREAD: - /* rmaps_lama_cmd_map = strdup("hcL1L2L3sNbn"); */ - rmaps_lama_cmd_map = strdup("hcsbn"); - break; - case ORTE_MAPPING_RR: - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - "round robin", "mapping by round robin not supported by LAMA"); - exit_status = ORTE_ERR_NOT_SUPPORTED; - goto cleanup; - case ORTE_MAPPING_SEQ: - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - "sequential", "mapping by sequential not supported by LAMA"); - exit_status = ORTE_ERR_NOT_SUPPORTED; - goto cleanup; - case ORTE_MAPPING_BYUSER: - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - "by user", "mapping by user not supported by LAMA"); - exit_status = ORTE_ERR_NOT_SUPPORTED; - goto cleanup; - default: - /* - * Default is map-by core - */ - rmaps_lama_cmd_map = strdup("cL1L2L3sNbnh"); - break; - } - } - - /* - * Binding Options - */ - if( NULL == rmaps_lama_cmd_bind ) { - /* - * No binding specified, use default - */ - if( !OPAL_BINDING_POLICY_IS_SET(jdata->map->binding) || - !OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy) || - OPAL_BIND_TO_NONE == OPAL_GET_BINDING_POLICY(jdata->map->binding) ) { - rmaps_lama_cmd_bind = NULL; - } - - switch( OPAL_GET_BINDING_POLICY(jdata->map->binding) ) { - case OPAL_BIND_TO_BOARD: - /* rmaps_lama_cmd_bind = strdup("1b"); */ - orte_show_help("help-orte-rmaps-lama.txt", - "invalid binding option", - true, - "by board", "binding to board not supported by LAMA"); - exit_status = ORTE_ERR_NOT_SUPPORTED; - goto cleanup; - break; - case OPAL_BIND_TO_NUMA: - rmaps_lama_cmd_bind = strdup("1N"); - break; - case OPAL_BIND_TO_SOCKET: - rmaps_lama_cmd_bind = strdup("1s"); - break; - case OPAL_BIND_TO_L3CACHE: - rmaps_lama_cmd_bind = strdup("1L3"); - break; - case OPAL_BIND_TO_L2CACHE: - rmaps_lama_cmd_bind = strdup("1L2"); - break; - case OPAL_BIND_TO_L1CACHE: - rmaps_lama_cmd_bind = strdup("1L1"); - break; - case OPAL_BIND_TO_CORE: - rmaps_lama_cmd_bind = strdup("1c"); - break; - case OPAL_BIND_TO_HWTHREAD: - rmaps_lama_cmd_bind = strdup("1h"); - break; - case OPAL_BIND_TO_CPUSET: - orte_show_help("help-orte-rmaps-lama.txt", - "invalid binding option", - true, - "by CPU set", "binding to CPU set not supported by LAMA"); - exit_status = ORTE_ERR_NOT_SUPPORTED; - goto cleanup; - break; - default: - rmaps_lama_cmd_bind = NULL; - break; - } - } - - /* - * Ordering (a.k.a. Ranking) Options - */ - if( NULL == rmaps_lama_cmd_ordering ) { - /* orte_rmaps_base.ranking */ - switch( ORTE_GET_RANKING_POLICY(jdata->map->ranking) ) { - case ORTE_RANK_BY_SLOT: - rmaps_lama_cmd_ordering = strdup("s"); - break; - case ORTE_RANK_BY_NODE: - case ORTE_RANK_BY_NUMA: - case ORTE_RANK_BY_SOCKET: - case ORTE_RANK_BY_L3CACHE: - case ORTE_RANK_BY_L2CACHE: - case ORTE_RANK_BY_L1CACHE: - case ORTE_RANK_BY_CORE: - case ORTE_RANK_BY_HWTHREAD: - rmaps_lama_cmd_ordering = strdup("n"); - break; - case ORTE_RANK_BY_BOARD: - /* rmaps_lama_cmd_ordering = strdup("n"); */ - orte_show_help("help-orte-rmaps-lama.txt", - "invalid ordering option", - true, - "by board", "ordering by board not supported by LAMA"); - exit_status = ORTE_ERR_NOT_SUPPORTED; - goto cleanup; - break; - default: - rmaps_lama_cmd_ordering = strdup("n"); - break; - } - } - - /* - * MPPR - */ - if( NULL == rmaps_lama_cmd_mppr ) { - /* - * The ppr is given in the map - */ - if( NULL != jdata->map->ppr) { - rmaps_lama_cmd_mppr = rmaps_lama_covert_ppr(jdata->map->ppr); - } - } - - /* - * Oversubscription - */ - if( ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping) ) { - rmaps_lama_can_oversubscribe = false; - } - else { - rmaps_lama_can_oversubscribe = true; - } - - /* - * Display revised values - */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Revised Parameters -----"); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Map : %s", - rmaps_lama_cmd_map); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Bind : %s", - rmaps_lama_cmd_bind); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: MPPR : %s", - rmaps_lama_cmd_mppr); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Order : %s", - rmaps_lama_cmd_ordering); - - cleanup: - return exit_status; -} - -static char * rmaps_lama_covert_ppr(char * given_ppr) -{ - return strdup(given_ppr); -} - -int rmaps_lama_parse_mapping(char *layout, - rmaps_lama_level_type_t **layout_types, - rmaps_lama_level_type_t **layout_types_sorted, - int *num_types) -{ - int exit_status = ORTE_SUCCESS; - char param[3]; - int i, j, len; - bool found_req_param_n = false; - bool found_req_param_h = false; - bool found_req_param_bind = false; - - /* - * Sanity Check: - * There is no default layout, so if we get here and nothing is specified - * then this is an error. - */ - if( NULL == layout ) { - orte_show_help("help-orte-rmaps-lama.txt", - "internal error", - true, - "rmaps_lama_parse_mapping", - "internal error 1"); - return ORTE_ERROR; - } - - *num_types = 0; - - /* - * Extract and convert all the keys - */ - len = strlen(layout); - for(i = 0; i < len; ++i) { - /* - * L1 : L1 Cache - * L2 : L2 Cache - * L3 : L3 Cache - */ - if( layout[i] == 'L' ) { - param[0] = layout[i]; - ++i; - /* - * Check for 2 characters - */ - if( i >= len ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - layout, "cache level missing number"); - exit_status = ORTE_ERROR; - goto cleanup; - } - param[1] = layout[i]; - param[2] = '\0'; - } - /* - * n : Machine - * b : Board - * s : Socket - * c : Core - * h : Hardware Thread - * N : NUMA Node - */ - else { - param[0] = layout[i]; - param[1] = '\0'; - } - - /* - * Append level - */ - *num_types += 1; - *layout_types = (rmaps_lama_level_type_t*)realloc(*layout_types, sizeof(rmaps_lama_level_type_t) * (*num_types)); - (*layout_types)[(*num_types)-1] = lama_type_str_to_enum(param); - } - - /* - * Check for duplicates and unknowns - * Copy to sorted list - */ - *layout_types_sorted = (rmaps_lama_level_type_t*)malloc(sizeof(rmaps_lama_level_type_t) * (*num_types)); - for( i = 0; i < *num_types; ++i ) { - /* - * Copy for later sorting - */ - (*layout_types_sorted)[i] = (*layout_types)[i]; - - /* - * Look for unknown and unsupported options - */ - if( LAMA_LEVEL_UNKNOWN <= (*layout_types)[i] ) { - char *msg; - asprintf(&msg, "unknown mapping level at position %d", i + 1); - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - layout, msg); - free(msg); - exit_status = ORTE_ERROR; - goto cleanup; - } - - if( LAMA_LEVEL_MACHINE == (*layout_types)[i] ) { - found_req_param_n = true; - } - - if( LAMA_LEVEL_PU == (*layout_types)[i] ) { - found_req_param_h = true; - } - - if( lama_binding_level == (*layout_types)[i] ) { - found_req_param_bind = true; - } - - /* - * Look for duplicates - */ - for( j = i+1; j < *num_types; ++j ) { - if( (*layout_types)[i] == (*layout_types)[j] ) { - char *msg; - asprintf(&msg, "duplicate mapping levels at position %d and %d", - i + 1, j + 1); - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - layout, msg); - free(msg); - exit_status = ORTE_ERROR; - goto cleanup; - } - } - } - - /* - * The user is required to specify at least the: - * - machine - * - hardware thread (needed for lower bound binding) JJH: We should be able to lift this... - * - binding layer (need it to stride the mapping) - * Only print the error message once, for brevity. - */ - if( !found_req_param_n ) { - char *msg; - asprintf(&msg, "missing required 'n' mapping token"); - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - layout, msg); - free(msg); - exit_status = ORTE_ERROR; - goto cleanup; - } - else if(!found_req_param_h) { - char *msg; - asprintf(&msg, "missing required 'h' mapping token"); - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - layout, msg); - free(msg); - exit_status = ORTE_ERROR; - goto cleanup; - } else if (!found_req_param_bind) { - char *msg; - asprintf(&msg, "missing required mapping token for the current binding level"); - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - layout, msg); - free(msg); - exit_status = ORTE_ERROR; - goto cleanup; - } - - /* - * Sort the items - */ - qsort((*layout_types_sorted ), (*num_types), sizeof(int), lama_parse_int_sort); - - cleanup: - return exit_status; -} - -int rmaps_lama_parse_binding(char *layout, rmaps_lama_level_type_t *binding_level, int *num_types) -{ - int exit_status = ORTE_SUCCESS; - char param[3]; - char num[MAX_BIND_DIGIT_LEN]; - int i, n, p, len; - - /* - * Default: If nothing specified - * - Bind to machine - */ - if( NULL == layout ) { - *binding_level = LAMA_LEVEL_MACHINE; - *num_types = 1; - return ORTE_SUCCESS; - } - - *num_types = 0; - - /* - * Extract and convert all the keys - */ - len = strlen(layout); - n = 0; - p = 0; - for(i = 0; i < len; ++i) { - /* - * Must start with a digit - */ - if( isdigit(layout[i]) ) { - /* - * Check: Digits must come first - */ - if( p != 0 ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid binding option", - true, - layout, "missing digit(s) before binding level token"); - exit_status = ORTE_ERROR; - goto cleanup; - } - - num[n] = layout[i]; - ++n; - /* - * Check: Exceed bound of number of digits - */ - if( n >= MAX_BIND_DIGIT_LEN ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid binding option", - true, - layout, "too many digits"); - exit_status = ORTE_ERROR; - goto cleanup; - } - } - /* - * Extract the level - */ - else { - /* - * Check: Digits must come first - */ - if( n == 0 ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid binding option", - true, - layout, "missing digit(s) before binding level token"); - exit_status = ORTE_ERROR; - goto cleanup; - } - /* - * Check: Only one level allowed - */ - if( p != 0 ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid binding option", - true, - layout, "only one binding level may be specified"); - exit_status = ORTE_ERROR; - goto cleanup; - } - - /* - * L1 : L1 Cache - * L2 : L2 Cache - * L3 : L3 Cache - */ - if( layout[i] == 'L' ) { - param[0] = layout[i]; - ++i; - /* - * Check for 2 characters - */ - if( i >= len ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid binding option", - true, - layout, "only one binding level may be specified"); - exit_status = ORTE_ERROR; - goto cleanup; - } - param[1] = layout[i]; - p = 2; - } - /* - * n : Machine - * b : Board - * s : Socket - * c : Core - * h : Hardware Thread - * N : NUMA Node - */ - else { - param[0] = layout[i]; - p = 1; - } - param[p] = '\0'; - } - } - /* - * Check that the level was specified - */ - if( p == 0 ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid binding option", - true, - layout, "binding specification is empty"); - exit_status = ORTE_ERROR; - goto cleanup; - } - num[n] = '\0'; - - *binding_level = lama_type_str_to_enum(param); - *num_types = atoi(num); - - /* - * Check for unknown level - */ - if( LAMA_LEVEL_UNKNOWN <= *binding_level ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid binding option", - true, - layout, "unknown binding level"); - exit_status = ORTE_ERROR; - goto cleanup; - } - - cleanup: - return exit_status; -} - -int rmaps_lama_parse_mppr(char *layout, rmaps_lama_level_info_t **mppr_levels, int *num_types) -{ - int exit_status = ORTE_SUCCESS; - char param[3]; - char num[MAX_BIND_DIGIT_LEN]; - char **argv = NULL; - int argc = 0; - int i, j, len; - int p, n; - - /* - * Default: Unrestricted allocation - * 'oversubscribe' flag accounted for elsewhere - */ - if( NULL == layout ) { - *mppr_levels = NULL; - *num_types = 0; - return ORTE_SUCCESS; - } - - *num_types = 0; - - /* - * Split by ',' - * <#:level>,<#:level>,... - */ - argv = opal_argv_split(layout, ','); - argc = opal_argv_count(argv); - for(j = 0; j < argc; ++j) { - /* - * Parse <#:level> - */ - len = strlen(argv[j]); - n = 0; - p = 0; - for(i = 0; i < len; ++i) { - /* - * Skip the ':' separator and whitespace - */ - if( argv[j][i] == ':' || isblank(argv[j][i])) { - continue; - } - /* - * Must start with a digit - */ - else if( isdigit(argv[j][i]) ) { - /* - * Check: Digits must come first - */ - if( p != 0 ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mppr option", - true, - layout, "missing digit(s) before resource specification"); - exit_status = ORTE_ERROR; - goto cleanup; - } - - num[n] = argv[j][i]; - ++n; - /* - * Check: Exceed bound of number of digits - */ - if( n >= MAX_BIND_DIGIT_LEN ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mppr option", - true, - layout, "too many digits"); - exit_status = ORTE_ERROR; - goto cleanup; - } - } - /* - * Extract the level - */ - else { - /* - * Check: Digits must come first - */ - if( n == 0 ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mppr option", - true, - layout, "missing digit(s) before resource specification"); - exit_status = ORTE_ERROR; - goto cleanup; - } - /* - * Check: Only one level allowed - */ - if( p != 0 ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mppr option", - true, - layout, "only one resource type may be listed per specification"); - exit_status = ORTE_ERROR; - goto cleanup; - } - - /* - * L1 : L1 Cache - * L2 : L2 Cache - * L3 : L3 Cache - */ - if( argv[j][i] == 'L' ) { - param[0] = argv[j][i]; - ++i; - /* - * Check for 2 characters - */ - if( i >= len ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mppr option", - true, - layout, "cache level missing number"); - exit_status = ORTE_ERROR; - goto cleanup; - } - param[1] = argv[j][i]; - p = 2; - } - /* - * n : Machine - * b : Board - * s : Socket - * c : Core - * h : Hardware Thread - * N : NUMA Node - */ - else { - param[0] = argv[j][i]; - p = 1; - } - param[p] = '\0'; - } - } - - /* - * Whitespace, just skip - */ - if( n == 0 && p == 0 ) { - continue; - } - - /* - * Check that the level was specified - */ - if( p == 0 ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mppr option", - true, - layout, "resource type not specified"); - exit_status = ORTE_ERROR; - goto cleanup; - } - num[n] = '\0'; - - /* - * Append level - */ - *num_types += 1; - *mppr_levels = (rmaps_lama_level_info_t*)realloc(*mppr_levels, sizeof(rmaps_lama_level_info_t) * (*num_types)); - (*mppr_levels)[(*num_types)-1].type = lama_type_str_to_enum(param); - (*mppr_levels)[(*num_types)-1].max_resources = atoi(num); - - } - - /* - * Check for duplicates and unknowns - */ - for( i = 0; i < *num_types; ++i ) { - /* - * Look for unknown and unsupported options - */ - if( LAMA_LEVEL_UNKNOWN <= (*mppr_levels)[i].type ) { - char *msg; - asprintf(&msg, "unknown resource type at position %d", i + 1); - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mppr option", - true, - layout, msg); - free(msg); - exit_status = ORTE_ERROR; - goto cleanup; - } - - /* - * Look for duplicates - */ - for( j = i+1; j < *num_types; ++j ) { - if( (*mppr_levels)[i].type == (*mppr_levels)[j].type ) { - char *msg; - asprintf(&msg, "duplicate resource tpyes at position %d and %d", - i + 1, j + 1); - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mppr option", - true, - layout, msg); - free(msg); - exit_status = ORTE_ERROR; - goto cleanup; - } - } - } - - cleanup: - if( NULL != argv ) { - opal_argv_free(argv); - argv = NULL; - } - - return exit_status; -} - -int rmaps_lama_parse_ordering(char *layout, - rmaps_lama_order_type_t *order) -{ - /* - * Default: Natural ordering - */ - if( NULL == layout ) { - *order = LAMA_ORDER_NATURAL; - return ORTE_SUCCESS; - } - - /* - * Sequential Ordering - */ - if( 0 == strncmp(layout, "s", strlen("s")) || - 0 == strncmp(layout, "S", strlen("S")) ) { - *order = LAMA_ORDER_SEQ; - } - /* - * Natural Ordering - */ - else if( 0 == strncmp(layout, "n", strlen("n")) || - 0 == strncmp(layout, "N", strlen("N")) ) { - *order = LAMA_ORDER_NATURAL; - } - /* - * Check for unknown options - */ - else { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid ordering option", - true, - "unsupported ordering option", layout); - return ORTE_ERROR; - } - - return ORTE_SUCCESS; -} - -bool rmaps_lama_ok_to_prune_level(rmaps_lama_level_type_t level) -{ - int i; - - for( i = 0; i < lama_mapping_num_layouts; ++i ) { - if( level == lama_mapping_layout[i] ) { - return false; - } - } - - return true; -} - -/********************************* - * Support Functions - *********************************/ -static int lama_parse_int_sort(const void *a, const void *b) { - int left = *((int*)a); - int right = *((int*)b); - - if( left < right ) { - return -1; - } - else if( left > right ) { - return 1; - } - else { - return 0; - } -}