diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 51ec268d707..e707c428125 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -20,7 +20,7 @@ * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2016-2018 Mellanox Technologies Ltd. All rights reserved. * * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -384,7 +384,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) volatile bool active; bool background_fence = false; - OMPI_TIMING_INIT(32); + OMPI_TIMING_INIT(64); ompi_hook_base_mpi_init_top(argc, argv, requested, provided); @@ -423,6 +423,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) error = "ompi_mpi_init: opal_init_util failed"; goto error; } + OMPI_TIMING_IMPORT_OPAL("opal_init_util"); /* If thread support was enabled, then setup OPAL to allow for them. This must be done * early to prevent a race condition that can occur with orte_init(). */ @@ -512,8 +513,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) error = "ompi_mpi_init: ompi_rte_init failed"; goto error; } - OMPI_TIMING_NEXT("rte_init"); + OMPI_TIMING_IMPORT_OPAL("orte_ess_base_app_setup"); + OMPI_TIMING_IMPORT_OPAL("rte_init"); ompi_rte_initialized = true; @@ -643,16 +645,23 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } OMPI_TIMING_IMPORT_OPAL("orte_init"); - OMPI_TIMING_IMPORT_OPAL("opal_init_util"); OMPI_TIMING_NEXT("rte_init-commit"); - /* exchange connection info - this function may also act as a barrier * if data exchange is required. The modex occurs solely across procs * in our job. If a barrier is required, the "modex" function will * perform it internally */ opal_pmix.commit(); OMPI_TIMING_NEXT("commit"); +#if (OPAL_ENABLE_TIMING) + if (OMPI_TIMING_ENABLED && !opal_pmix_base_async_modex && + opal_pmix_collect_all_data) { + opal_pmix.fence(NULL, 0); + OMPI_TIMING_NEXT("pmix-barrier-1"); + opal_pmix.fence(NULL, 0); + OMPI_TIMING_NEXT("pmix-barrier-2"); + } +#endif /* If we have a non-blocking fence: * if we are doing an async modex, but we are collecting all diff --git a/ompi/util/timings.h b/ompi/util/timings.h index ea23cc99e4a..be870665529 100644 --- a/ompi/util/timings.h +++ b/ompi/util/timings.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2017-2018 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * @@ -21,6 +21,7 @@ typedef struct { double ts; char *file; char *prefix; + int imported; } ompi_timing_val_t; typedef struct { @@ -36,11 +37,15 @@ typedef struct ompi_timing_t { int cnt; int error; int enabled; + int import_cnt; opal_timing_ts_func_t get_ts; ompi_timing_list_t *timing; ompi_timing_list_t *cur_timing; } ompi_timing_t; +#define OMPI_TIMING_ENABLED \ + (getenv("OMPI_TIMING_ENABLE") ? atoi(getenv("OMPI_TIMING_ENABLE")) : 0) + #define OMPI_TIMING_INIT(_size) \ ompi_timing_t OMPI_TIMING; \ OMPI_TIMING.prefix = __func__; \ @@ -50,6 +55,7 @@ typedef struct ompi_timing_t { OMPI_TIMING.error = 0; \ OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ OMPI_TIMING.enabled = 0; \ + OMPI_TIMING.import_cnt = 0; \ { \ char *ptr; \ ptr = getenv("OMPI_TIMING_ENABLE"); \ @@ -94,7 +100,8 @@ typedef struct ompi_timing_t { #define OMPI_TIMING_NEXT(...) \ do { \ if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \ - char *f = strrchr(__FILE__, '/') + 1; \ + char *f = strrchr(__FILE__, '/'); \ + f = (f == NULL) ? strdup(__FILE__) : f+1; \ int len = 0; \ if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \ OMPI_TIMING_ITEM_EXTEND; \ @@ -135,10 +142,13 @@ typedef struct ompi_timing_t { int cnt; \ int i; \ double ts; \ + OMPI_TIMING.import_cnt++; \ OPAL_TIMING_ENV_CNT(func, cnt); \ OPAL_TIMING_ENV_ERROR_PREFIX(_prefix, func, OMPI_TIMING.error); \ for(i = 0; i < cnt; i++){ \ char *desc, *filename; \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].imported= \ + OMPI_TIMING.import_cnt; \ OPAL_TIMING_ENV_GETDESC_PREFIX(_prefix, &filename, func, i, &desc, ts); \ OMPI_TIMING_APPEND(filename, func, desc, ts); \ } \ @@ -155,6 +165,7 @@ typedef struct ompi_timing_t { MPI_Comm_size(MPI_COMM_WORLD, &size); \ MPI_Comm_rank(MPI_COMM_WORLD, &rank); \ int error = 0; \ + int imported = 0; \ \ MPI_Reduce(&OMPI_TIMING.error, &error, 1, \ MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); \ @@ -171,6 +182,7 @@ typedef struct ompi_timing_t { char **desc = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ char **prefix = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ char **file = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ + double total_avg = 0, total_min = 0, total_max = 0; \ \ if( OMPI_TIMING.cnt > 0 ) { \ OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ @@ -193,21 +205,53 @@ typedef struct ompi_timing_t { timing = (ompi_timing_list_t*)timing->next; \ } while (timing != NULL); \ \ - if( 0 == rank ){ \ + if( 0 == rank ) { \ if (OMPI_TIMING.timing->next) { \ printf("==OMPI_TIMING== warning: added the extra timings allocation that might misrepresent the results.\n" \ "==OMPI_TIMING== Increase the inited size of timings to avoid extra allocation during runtime.\n"); \ } \ \ printf("------------------ %s ------------------\n", \ - OMPI_TIMING.prefix); \ + OMPI_TIMING.prefix); \ + imported = OMPI_TIMING.timing->val[0].imported; \ for(i=0; i< OMPI_TIMING.cnt; i++){ \ + bool print_total = 0; \ + imported = OMPI_TIMING.timing->val[i].imported; \ avg[i] /= size; \ - printf("[%s:%s:%s]: %lf / %lf / %lf\n", \ + printf("%s[%s:%s:%s]: %lf / %lf / %lf\n", \ + imported ? " -- " : "", \ file[i], prefix[i], desc[i], avg[i], min[i], max[i]); \ + if (OMPI_TIMING.timing->val[i].imported) { \ + total_avg += avg[i]; \ + total_min += min[i]; \ + total_max += max[i]; \ + } \ + if (i == (OMPI_TIMING.cnt-1)) { \ + print_total = true; \ + } else { \ + print_total = imported != OMPI_TIMING.timing->val[i+1].imported; \ + } \ + if (print_total && OMPI_TIMING.timing->val[i].imported) { \ + printf("%s[%s:%s:%s]: %lf / %lf / %lf\n", \ + imported ? " !! " : "", \ + file[i], prefix[i], "total", \ + total_avg, total_min, total_max); \ + total_avg = 0; total_min = 0; total_max = 0; \ + } \ + } \ + total_avg = 0; total_min = 0; total_max = 0; \ + for(i=0; i< OMPI_TIMING.cnt; i++) { \ + if (!OMPI_TIMING.timing->val[i].imported) { \ + total_avg += avg[i]; \ + total_min += min[i]; \ + total_max += max[i]; \ + } \ } \ + printf("[%s:total] %lf / %lf / %lf\n", \ + OMPI_TIMING.prefix, \ + total_avg, total_min, total_max); \ printf("[%s:overhead]: %lf \n", OMPI_TIMING.prefix, \ - OMPI_TIMING.get_ts() - OMPI_TIMING.ts); \ + OMPI_TIMING.get_ts() - OMPI_TIMING.ts); \ } \ } \ free(avg); \ @@ -233,6 +277,8 @@ typedef struct ompi_timing_t { #define OMPI_TIMING_FINALIZE +#define OMPI_TIMING_ENABLED 0 + #endif #endif diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index 81ab8be1fb8..855da937eee 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -20,6 +20,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. * All Rights reserved. + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -479,12 +481,16 @@ opal_init_util(int* pargc, char*** pargv) goto return_error; } + OPAL_TIMING_ENV_NEXT(otmng, "opal_dss_open"); + /* initialize the mca */ if (OPAL_SUCCESS != (ret = mca_base_open())) { error = "mca_base_open"; goto return_error; } + OPAL_TIMING_ENV_NEXT(otmng, "mca_base_open"); + /* initialize if framework */ if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_if_base_framework, 0))) { fprintf(stderr, "opal_if_base_open() failed -- process will likely abort (%s:%d, returned %d instead of OPAL_SUCCESS)\n", diff --git a/opal/util/timings.h b/opal/util/timings.h index f84137e174e..1d3a11e5c27 100644 --- a/opal/util/timings.h +++ b/opal/util/timings.h @@ -1,7 +1,7 @@ /* * Copyright (C) 2014 Artem Polyakov * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2017-2018 Mellanox Technologies Ltd. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -91,7 +91,7 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); #define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name) \ do { \ opal_timing_env_t name ## _val, *name = &(name ## _val); \ - *name = OPAL_TIMING_ENV_START_TYPE(__func__, OPAL_TIMING_AUTOMATIC_TIMER, prefix); \ + *name = OPAL_TIMING_ENV_START_TYPE(__func__, name, OPAL_TIMING_AUTOMATIC_TIMER, prefix); \ } while(0) #define OPAL_TIMING_ENV_NEXT(h, ...) \ @@ -121,7 +121,8 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); h->error = 1; \ } \ setenv(buf1, buf2, 1); \ - filename = strrchr(__FILE__, '/') + 1; \ + filename = strrchr(__FILE__, '/'); \ + filename = (filename == NULL) ? strdup(__FILE__) : filename+1; \ n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_FILE_%d", h->id, h->cntr); \ if ( n > OPAL_TIMING_STR_LEN ){ \ h->error = 1; \ @@ -207,7 +208,7 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); #define OPAL_TIMING_ENV_INIT(name) -#define OPAL_TIMING_ENV_INIT_PREFIX(prefix) +#define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name) #define OPAL_TIMING_ENV_NEXT(h, ... ) diff --git a/orte/mca/ess/base/ess_base_std_app.c b/orte/mca/ess/base/ess_base_std_app.c index 7addd036dce..a02711f5f43 100644 --- a/orte/mca/ess/base/ess_base_std_app.c +++ b/orte/mca/ess/base/ess_base_std_app.c @@ -16,6 +16,8 @@ * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -62,6 +64,7 @@ #include "orte/util/session_dir.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "opal/util/timings.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" @@ -74,6 +77,7 @@ int orte_ess_base_app_setup(bool db_restrict_local) char *error = NULL; opal_list_t transports; + OPAL_TIMING_ENV_INIT(ess_base_setup); /* * stdout/stderr buffering * If the user requested to override the default setting then do @@ -116,6 +120,7 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_state_base_select"; goto error; } + OPAL_TIMING_ENV_NEXT(ess_base_setup, "state_framework_open"); /* open the errmgr */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) { @@ -123,6 +128,8 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_errmgr_base_open"; goto error; } + OPAL_TIMING_ENV_NEXT(ess_base_setup, "errmgr_framework_open"); + /* setup my session directory */ if (orte_create_session_dirs) { OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output, @@ -157,6 +164,8 @@ int orte_ess_base_app_setup(bool db_restrict_local) } } } + OPAL_TIMING_ENV_NEXT(ess_base_setup, "create_session_dirs"); + /* Setup the communication infrastructure */ /* Routed system */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) { @@ -169,6 +178,8 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_routed_base_select"; goto error; } + OPAL_TIMING_ENV_NEXT(ess_base_setup, "routed_framework_open"); + /* * OOB Layer */ @@ -182,6 +193,8 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_oob_base_select"; goto error; } + OPAL_TIMING_ENV_NEXT(ess_base_setup, "oob_framework_open"); + /* Runtime Messaging Layer */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -193,6 +206,8 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_rml_base_select"; goto error; } + OPAL_TIMING_ENV_NEXT(ess_base_setup, "rml_framework_open"); + /* if we have info on the HNP and local daemon, process it */ if (NULL != orte_process_info.my_hnp_uri) { /* we have to set the HNP's name, even though we won't route messages directly @@ -243,6 +258,7 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_errmgr_base_select"; goto error; } + OPAL_TIMING_ENV_NEXT(ess_base_setup, "errmgr_select"); /* get a conduit for our use - we never route IO over fabric */ OBJ_CONSTRUCT(&transports, opal_list_t); @@ -264,6 +280,7 @@ int orte_ess_base_app_setup(bool db_restrict_local) goto error; } OPAL_LIST_DESTRUCT(&transports); + OPAL_TIMING_ENV_NEXT(ess_base_setup, "rml_open_conduit"); /* * Group communications @@ -278,6 +295,7 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_grpcomm_base_select"; goto error; } + OPAL_TIMING_ENV_NEXT(ess_base_setup, "grpcomm_framework_open"); /* open the distributed file system */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { @@ -290,6 +308,8 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_dfs_base_select"; goto error; } + OPAL_TIMING_ENV_NEXT(ess_base_setup, "dfs_framework_open"); + return ORTE_SUCCESS; error: orte_show_help("help-orte-runtime.txt", diff --git a/orte/mca/ess/pmi/ess_pmi_module.c b/orte/mca/ess/pmi/ess_pmi_module.c index 0420218e26b..563b8115dcf 100644 --- a/orte/mca/ess/pmi/ess_pmi_module.c +++ b/orte/mca/ess/pmi/ess_pmi_module.c @@ -15,6 +15,8 @@ * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -51,6 +53,7 @@ #include "opal/util/proc.h" #include "opal/mca/pmix/pmix.h" #include "opal/mca/pmix/base/base.h" +#include "opal/util/timings.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/grpcomm/grpcomm.h" @@ -101,16 +104,20 @@ static int rte_init(void) bool bool_val, *bool_ptr = &bool_val, tdir_mca_override = false; size_t i; + OPAL_TIMING_ENV_INIT(rte_init); + /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_ess_base_std_prolog"; goto error; } + OPAL_TIMING_ENV_NEXT(rte_init, "orte_ess_base_std_prolog"); /* get an async event base - we use the opal_async one so * we don't startup extra threads if not needed */ orte_event_base = opal_progress_thread_init(NULL); progress_thread_running = true; + OPAL_TIMING_ENV_NEXT(rte_init, "progress_thread_init"); /* open and setup pmix */ if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { @@ -126,6 +133,8 @@ static int rte_init(void) } /* set the event base */ opal_pmix_base_set_evbase(orte_event_base); + OPAL_TIMING_ENV_NEXT(rte_init, "pmix_framework_open"); + /* initialize the selected module */ if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init(NULL)))) { /* we cannot run - this could be due to being direct launched @@ -166,6 +175,8 @@ static int rte_init(void) pname.jobid = ORTE_PROC_MY_NAME->jobid; pname.vpid = 0; + OPAL_TIMING_ENV_NEXT(rte_init, "pmix_init"); + /* get our local rank from PMI */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_RANK, ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16); @@ -242,6 +253,7 @@ static int rte_init(void) if (OPAL_SUCCESS == ret) { orte_process_info.num_nodes = u32; } + OPAL_TIMING_ENV_NEXT(rte_init, "pmix_get_job_params"); /* setup transport keys in case the MPI layer needs them - * we can use the jobfam and stepid as unique keys @@ -263,6 +275,7 @@ static int rte_init(void) /* cannot free the envar as that messes up our environ */ free(string_key); } + OPAL_TIMING_ENV_NEXT(rte_init, "orte_precondition_transport"); /* retrieve temp directories info */ OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_TMPDIR, &wildcard_rank, &val, OPAL_STRING); @@ -320,6 +333,7 @@ static int rte_init(void) orte_process_info.rm_session_dirs = bool_val; } } + OPAL_TIMING_ENV_NEXT(rte_init, "pmix_set_tdirs"); /* get our local peers */ if (0 < orte_process_info.num_local_peers) { @@ -395,6 +409,7 @@ static int rte_init(void) free(mycpuset); } } + OPAL_TIMING_ENV_NEXT(rte_init, "pmix_set_locality"); /* now that we have all required info, complete the setup */ if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(false))) { @@ -402,12 +417,14 @@ static int rte_init(void) error = "orte_ess_base_app_setup"; goto error; } + OPAL_TIMING_ENV_NEXT(rte_init, "ess_base_app_setup"); /* setup process binding */ if (ORTE_SUCCESS != (ret = orte_ess_base_proc_binding())) { error = "proc_binding"; goto error; } + OPAL_TIMING_ENV_NEXT(rte_init, "ess_base_proc_binding"); /* this needs to be set to enable debugger use when direct launched */ if (NULL == orte_process_info.my_daemon_uri) { @@ -443,7 +460,8 @@ static int rte_init(void) opal_pmix.commit(); opal_pmix.fence(NULL, 0); } - + OPAL_TIMING_ENV_NEXT(rte_init, "rte_init_done"); + return ORTE_SUCCESS; error: diff --git a/oshmem/mca/scoll/base/scoll_base_frame.c b/oshmem/mca/scoll/base/scoll_base_frame.c index e8db9b35c35..592b3dd2b8a 100644 --- a/oshmem/mca/scoll/base/scoll_base_frame.c +++ b/oshmem/mca/scoll/base/scoll_base_frame.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 Mellanox Technologies, Inc. + * Copyright (c) 2013-2018 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ * @@ -17,6 +17,7 @@ #include "oshmem/mca/mca.h" #include "opal/util/output.h" #include "opal/mca/base/base.h" +#include "ompi/util/timings.h" #include "oshmem/util/oshmem_util.h" #include "oshmem/mca/scoll/scoll.h" @@ -57,6 +58,8 @@ int mca_scoll_enable(void) { int ret = OSHMEM_SUCCESS; + OPAL_TIMING_ENV_INIT(mca_scoll_enable); + if (!mca_scoll_sync_array) { void* ptr = (void*) mca_scoll_sync_array; int i = 0; @@ -69,16 +72,23 @@ int mca_scoll_enable(void) } } + OPAL_TIMING_ENV_NEXT(mca_scoll_enable, "memheap"); + /* Note: it is done to support FCA only and we need to consider possibility to * find a way w/o this ugly hack */ if (OSHMEM_SUCCESS != (ret = mca_scoll_base_select(oshmem_group_all))) { return ret; } + + OPAL_TIMING_ENV_NEXT(mca_scoll_enable, "group_all"); + if (OSHMEM_SUCCESS != (ret = mca_scoll_base_select(oshmem_group_self))) { return ret; } + OPAL_TIMING_ENV_NEXT(mca_scoll_enable, "group_self"); + return OSHMEM_SUCCESS; } diff --git a/oshmem/mca/scoll/base/scoll_base_select.c b/oshmem/mca/scoll/base/scoll_base_select.c index 600fdc4ec68..fdaddfe1699 100644 --- a/oshmem/mca/scoll/base/scoll_base_select.c +++ b/oshmem/mca/scoll/base/scoll_base_select.c @@ -1,7 +1,7 @@ /* - * Copyright (c) 2013 Mellanox Technologies, Inc. + * Copyright (c) 2013-2018 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,6 +21,7 @@ #include "oshmem/mca/mca.h" #include "opal/mca/base/base.h" #include "opal/mca/base/mca_base_component_repository.h" +#include "ompi/util/timings.h" #include "oshmem/util/oshmem_util.h" #include "oshmem/mca/scoll/scoll.h" @@ -194,6 +195,8 @@ int mca_scoll_base_select(struct oshmem_group_t *group) opal_list_item_t *item; int ret; + OPAL_TIMING_ENV_INIT(mca_scoll_base_select); + /* Announce */ SCOLL_VERBOSE(10, "scoll:base:group_select: new group: %d", group->id); mca_scoll_base_group_unselect(group); @@ -206,6 +209,9 @@ int mca_scoll_base_select(struct oshmem_group_t *group) group->g_scoll.scoll_alltoall = scoll_null_alltoall; return OSHMEM_SUCCESS; } + + OPAL_TIMING_ENV_NEXT(mca_scoll_base_select, "setup"); + SCOLL_VERBOSE(10, "scoll:base:group_select: Checking all available modules"); selectable = check_components(&oshmem_scoll_base_framework.framework_components, group); @@ -218,6 +224,8 @@ int mca_scoll_base_select(struct oshmem_group_t *group) return OSHMEM_ERROR; } + OPAL_TIMING_ENV_NEXT(mca_scoll_base_select, "check_components"); + /* do the selection loop */ for (item = opal_list_remove_first(selectable); NULL != item; item = opal_list_remove_first(selectable)) { @@ -236,6 +244,8 @@ int mca_scoll_base_select(struct oshmem_group_t *group) OBJ_RELEASE(avail); } + OPAL_TIMING_ENV_NEXT(mca_scoll_base_select, "select_loop"); + /* Done with the list from the check_components() call so release it. */ OBJ_RELEASE(selectable); if ((NULL == group->g_scoll.scoll_barrier) @@ -247,6 +257,8 @@ int mca_scoll_base_select(struct oshmem_group_t *group) return OSHMEM_ERR_NOT_FOUND; } + OPAL_TIMING_ENV_NEXT(mca_scoll_base_select, "release"); + return OSHMEM_SUCCESS; } diff --git a/oshmem/mca/scoll/mpi/scoll_mpi_module.c b/oshmem/mca/scoll/mpi/scoll_mpi_module.c index adc1b4a826f..1228cf8a3a2 100644 --- a/oshmem/mca/scoll/mpi/scoll_mpi_module.c +++ b/oshmem/mca/scoll/mpi/scoll_mpi_module.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2011 Mellanox Technologies. All rights reserved. - * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2018 Mellanox Technologies. All rights reserved. + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -17,6 +17,7 @@ #include "oshmem/proc/proc.h" #include "oshmem/runtime/runtime.h" #include "ompi/mca/coll/base/base.h" +#include "opal/util/timings.h" int mca_scoll_mpi_init_query(bool enable_progress_threads, bool enable_mpi_threads) { @@ -121,20 +122,27 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority) if ((osh_group->proc_count < 2) || (osh_group->proc_count < cm->mpi_np)) { return NULL; } + OPAL_TIMING_ENV_INIT(comm_query); + /* Create OMPI_Comm object and store ptr to it in group obj*/ if (NULL == oshmem_group_all) { osh_group->ompi_comm = &(ompi_mpi_comm_world.comm); + OPAL_TIMING_ENV_NEXT(comm_query, "ompi_mpi_comm_world"); } else { err = ompi_comm_group(&(ompi_mpi_comm_world.comm), &parent_group); if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) { return NULL; } + OPAL_TIMING_ENV_NEXT(comm_query, "ompi_comm_group"); + ranks = (int*) malloc(osh_group->proc_count * sizeof(int)); if (OPAL_UNLIKELY(NULL == ranks)) { return NULL; } tag = 1; + OPAL_TIMING_ENV_NEXT(comm_query, "malloc"); + for (i = 0; i < osh_group->proc_count; i++) { ompi_proc_t* ompi_proc; for( int j = 0; j < ompi_group_size(parent_group); j++ ) { @@ -146,24 +154,32 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority) } } + OPAL_TIMING_ENV_NEXT(comm_query, "build_ranks"); + err = ompi_group_incl(parent_group, osh_group->proc_count, ranks, &new_group); if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) { free(ranks); return NULL; } + OPAL_TIMING_ENV_NEXT(comm_query, "ompi_group_incl"); + err = ompi_comm_create_group(&(ompi_mpi_comm_world.comm), new_group, tag, &newcomm); if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) { free(ranks); return NULL; } + OPAL_TIMING_ENV_NEXT(comm_query, "ompi_comm_create_group"); + err = ompi_group_free(&new_group); if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) { free(ranks); return NULL; } + OPAL_TIMING_ENV_NEXT(comm_query, "ompi_group_free"); free(ranks); osh_group->ompi_comm = newcomm; + OPAL_TIMING_ENV_NEXT(comm_query, "set_group_comm"); } mpi_module = OBJ_NEW(mca_scoll_mpi_module_t); if (!mpi_module){ diff --git a/oshmem/runtime/oshmem_shmem_init.c b/oshmem/runtime/oshmem_shmem_init.c index dc9169898d6..2a52b4550cd 100644 --- a/oshmem/runtime/oshmem_shmem_init.c +++ b/oshmem/runtime/oshmem_shmem_init.c @@ -51,6 +51,7 @@ #include "opal/mca/allocator/base/base.h" #include "ompi/proc/proc.h" #include "ompi/runtime/mpiruntime.h" +#include "ompi/util/timings.h" #include "oshmem/constants.h" #include "oshmem/runtime/runtime.h" @@ -143,17 +144,26 @@ int oshmem_shmem_init(int argc, char **argv, int requested, int *provided) { int ret = OSHMEM_SUCCESS; + OMPI_TIMING_INIT(32); + if (!oshmem_shmem_initialized) { if (!ompi_mpi_initialized && !ompi_mpi_finalized) { ret = ompi_mpi_init(argc, argv, requested, provided); } + OMPI_TIMING_NEXT("ompi_mpi_init"); if (OSHMEM_SUCCESS != ret) { return ret; } PMPI_Comm_dup(MPI_COMM_WORLD, &oshmem_comm_world); + OMPI_TIMING_NEXT("PMPI_Comm_dup"); + ret = _shmem_init(argc, argv, requested, provided); + OMPI_TIMING_NEXT("_shmem_init"); + OMPI_TIMING_IMPORT_OPAL("mca_scoll_mpi_comm_query"); + OMPI_TIMING_IMPORT_OPAL("mca_scoll_enable"); + OMPI_TIMING_IMPORT_OPAL("mca_scoll_base_select"); if (OSHMEM_SUCCESS != ret) { return ret; @@ -164,11 +174,15 @@ int oshmem_shmem_init(int argc, char **argv, int requested, int *provided) SHMEM_API_ERROR( "shmem_lock_init() failed"); return OSHMEM_ERROR; } + OMPI_TIMING_NEXT("shmem_lock_init"); /* this is a collective op, implies barrier */ MCA_MEMHEAP_CALL(get_all_mkeys()); + OMPI_TIMING_NEXT("get_all_mkeys()"); oshmem_shmem_preconnect_all(); + OMPI_TIMING_NEXT("shmem_preconnect_all"); + #if OSHMEM_OPAL_THREAD_ENABLE pthread_t thread_id; int perr; @@ -178,11 +192,14 @@ int oshmem_shmem_init(int argc, char **argv, int requested, int *provided) return OSHMEM_ERROR; } #endif + OMPI_TIMING_NEXT("THREAD_ENABLE"); } #ifdef SIGUSR1 signal(SIGUSR1,sighandler__SIGUSR1); signal(SIGTERM,sighandler__SIGTERM); #endif + OMPI_TIMING_OUT; + OMPI_TIMING_FINALIZE; return ret; }