From b0d975f10b6e3d9f671d5899f185369017721492 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Thu, 30 Jul 2015 17:30:07 -0500 Subject: [PATCH 01/14] Performance tuning: make sure the individual component is selected for 1 and 2 process communicators (important for some benchmarks) --- ompi/mca/fcoll/base/fcoll_base_file_select.c | 3 +++ ompi/mca/fcoll/individual/fcoll_individual_module.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/ompi/mca/fcoll/base/fcoll_base_file_select.c b/ompi/mca/fcoll/base/fcoll_base_file_select.c index 433098af36..9dba84ed43 100644 --- a/ompi/mca/fcoll/base/fcoll_base_file_select.c +++ b/ompi/mca/fcoll/base/fcoll_base_file_select.c @@ -262,6 +262,9 @@ int mca_fcoll_base_query_table (struct mca_io_ompio_file_t *file, char *name) file->f_cc_size >= file->f_stripe_size) { return 1; } + if ( 2 >= (int)file->f_size ){ + return 1; + } } if (!strcmp (name, "dynamic")) { if ((int)file->f_cc_size < file->f_bytes_per_agg && diff --git a/ompi/mca/fcoll/individual/fcoll_individual_module.c b/ompi/mca/fcoll/individual/fcoll_individual_module.c index b9c95a86ba..eb30d6c41c 100644 --- a/ompi/mca/fcoll/individual/fcoll_individual_module.c +++ b/ompi/mca/fcoll/individual/fcoll_individual_module.c @@ -64,6 +64,9 @@ mca_fcoll_individual_component_file_query (mca_io_ompio_file_t *fh, int *priorit if (*priority < 50) { *priority = 50; } + if ( 2 >= fh->f_size ) { + *priority = 100; + } } return &individual; From 78c901da97bf549101ee15338ada0e2c3c2cba2c Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Fri, 7 Aug 2015 11:50:25 -0500 Subject: [PATCH 02/14] free the datatypes that were created using type_dup during file_set_view --- ompi/mca/io/ompio/io_ompio_file_open.c | 8 ++++++++ ompi/mca/io/ompio/io_ompio_file_set_view.c | 5 ++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/ompi/mca/io/ompio/io_ompio_file_open.c b/ompi/mca/io/ompio/io_ompio_file_open.c index 057c57e7af..3d85a20a21 100644 --- a/ompi/mca/io/ompio/io_ompio_file_open.c +++ b/ompi/mca/io/ompio/io_ompio_file_open.c @@ -380,6 +380,14 @@ ompio_io_ompio_file_close (mca_io_ompio_file_t *ompio_fh) ompi_datatype_destroy (&ompio_fh->f_iov_type); } + if ( MPI_DATATYPE_NULL != ompio_fh->f_etype ) { + ompi_datatype_destroy (&ompio_fh->f_etype); + } + if ( MPI_DATATYPE_NULL != ompio_fh->f_filetype ){ + ompi_datatype_destroy (&ompio_fh->f_filetype); + } + + if (MPI_COMM_NULL != ompio_fh->f_comm && (ompio_fh->f_flags & OMPIO_SHAREDFP_IS_SET) ) { ompi_comm_free (&ompio_fh->f_comm); } diff --git a/ompi/mca/io/ompio/io_ompio_file_set_view.c b/ompi/mca/io/ompio/io_ompio_file_set_view.c index 812e3096b7..7cbdec6801 100644 --- a/ompi/mca/io/ompio/io_ompio_file_set_view.c +++ b/ompi/mca/io/ompio/io_ompio_file_set_view.c @@ -138,7 +138,10 @@ int mca_io_ompio_file_set_view (ompi_file_t *fp, data = (mca_io_ompio_data_t *) fp->f_io_selected_data; fh = &data->ompio_fh; - + + ompi_datatype_destroy (&fh->f_etype); + ompi_datatype_destroy (&fh->f_filetype); + if (NULL != fh->f_decoded_iov) { free (fh->f_decoded_iov); fh->f_decoded_iov = NULL; From 47111062d33c8b94cb30c3681179defb5871eca7 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Fri, 7 Aug 2015 12:49:58 -0500 Subject: [PATCH 03/14] Performance tuning. make sure we catch if the user wants to set the default fileview and replace it with our optimized default file view. Otherwise, performance will suffer. file_get_view should still return the correct filetype, not our optimized default file view. This is the correct version compared to ffa67b96933acafb3cfacf39a074d3dc1fe134ae, which unfortunately broke some test cases in mpi_test_suite. Thanks for @ggouaillardet for reporting this! Conflicts: ompi/mca/io/ompio/io_ompio.c ompi/mca/io/ompio/io_ompio_file_set_view.c --- ompi/mca/io/ompio/io_ompio.c | 8 ++--- ompi/mca/io/ompio/io_ompio.h | 3 ++ ompi/mca/io/ompio/io_ompio_file_open.c | 4 +++ ompi/mca/io/ompio/io_ompio_file_set_view.c | 42 +++++++++++++++++----- 4 files changed, 44 insertions(+), 13 deletions(-) diff --git a/ompi/mca/io/ompio/io_ompio.c b/ompi/mca/io/ompio/io_ompio.c index 93e792d815..d1baa63e3b 100644 --- a/ompi/mca/io/ompio/io_ompio.c +++ b/ompi/mca/io/ompio/io_ompio.c @@ -102,7 +102,6 @@ static int mca_io_ompio_merge_groups(mca_io_ompio_file_t *fh, int num_merge_aggrs); - int ompi_io_ompio_set_file_defaults (mca_io_ompio_file_t *fh) { @@ -133,15 +132,16 @@ int ompi_io_ompio_set_file_defaults (mca_io_ompio_file_t *fh) fh->f_init_num_aggrs = -1; fh->f_init_aggr_list = NULL; - ompi_datatype_create_contiguous(1048576, + ompi_datatype_create_contiguous(MCA_IO_DEFAULT_FILE_VIEW_SIZE, &ompi_mpi_byte.dt, &default_file_view); ompi_datatype_commit (&default_file_view); fh->f_etype = &ompi_mpi_byte.dt; fh->f_filetype = default_file_view; - - + ompi_datatype_duplicate ( &ompi_mpi_byte.dt, &fh->f_orig_filetype ); + + /* Default file View */ fh->f_iov_type = MPI_DATATYPE_NULL; fh->f_stripe_size = mca_io_ompio_bytes_per_agg; diff --git a/ompi/mca/io/ompio/io_ompio.h b/ompi/mca/io/ompio/io_ompio.h index 0649f7a01e..e15769edac 100644 --- a/ompi/mca/io/ompio/io_ompio.h +++ b/ompi/mca/io/ompio/io_ompio.h @@ -57,7 +57,9 @@ OMPI_DECLSPEC extern int mca_io_ompio_coll_timing_info; #define OMPIO_CONTIGUOUS_FVIEW 0x00000010 #define OMPIO_AGGREGATOR_IS_SET 0x00000020 #define OMPIO_SHAREDFP_IS_SET 0x00000040 + #define QUEUESIZE 2048 +#define MCA_IO_DEFAULT_FILE_VIEW_SIZE 4*1024*1024 #define OMPIO_MIN(a, b) (((a) < (b)) ? (a) : (b)) #define OMPIO_MAX(a, b) (((a) < (b)) ? (b) : (a)) @@ -320,6 +322,7 @@ struct mca_io_ompio_file_t { size_t f_view_size; ompi_datatype_t *f_etype; ompi_datatype_t *f_filetype; + ompi_datatype_t *f_orig_filetype; /* the fileview passed by the user to us */ size_t f_etype_size; /* contains IO requests that needs to be read/written */ diff --git a/ompi/mca/io/ompio/io_ompio_file_open.c b/ompi/mca/io/ompio/io_ompio_file_open.c index 3d85a20a21..75e6226947 100644 --- a/ompi/mca/io/ompio/io_ompio_file_open.c +++ b/ompi/mca/io/ompio/io_ompio_file_open.c @@ -387,6 +387,10 @@ ompio_io_ompio_file_close (mca_io_ompio_file_t *ompio_fh) ompi_datatype_destroy (&ompio_fh->f_filetype); } + if ( MPI_DATATYPE_NULL != ompio_fh->f_orig_filetype ){ + ompi_datatype_destroy (&ompio_fh->f_orig_filetype); + } + if (MPI_COMM_NULL != ompio_fh->f_comm && (ompio_fh->f_flags & OMPIO_SHAREDFP_IS_SET) ) { ompi_comm_free (&ompio_fh->f_comm); diff --git a/ompi/mca/io/ompio/io_ompio_file_set_view.c b/ompi/mca/io/ompio/io_ompio_file_set_view.c index 7cbdec6801..a45e2acf7b 100644 --- a/ompi/mca/io/ompio/io_ompio_file_set_view.c +++ b/ompi/mca/io/ompio/io_ompio_file_set_view.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2014 University of Houston. All rights reserved. + * Copyright (c) 2008-2015 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -135,12 +135,15 @@ int mca_io_ompio_file_set_view (ompi_file_t *fp, { mca_io_ompio_data_t *data; mca_io_ompio_file_t *fh; + size_t ftype_size; + OPAL_PTRDIFF_TYPE ftype_extent, lb; data = (mca_io_ompio_data_t *) fp->f_io_selected_data; fh = &data->ompio_fh; ompi_datatype_destroy (&fh->f_etype); ompi_datatype_destroy (&fh->f_filetype); + ompi_datatype_destroy (&fh->f_orig_filetype); if (NULL != fh->f_decoded_iov) { free (fh->f_decoded_iov); @@ -157,14 +160,35 @@ int mca_io_ompio_file_set_view (ompi_file_t *fp, fh->f_flags |= OMPIO_FILE_VIEW_IS_SET; fh->f_datarep = strdup (datarep); - - mca_io_ompio_set_view_internal (fh, - disp, - etype, - filetype, - datarep, - info); + ompi_datatype_duplicate (filetype, &fh->f_orig_filetype ); + opal_datatype_get_extent(&filetype->super, &lb, &ftype_extent); + opal_datatype_type_size (&filetype->super, &ftype_size); + + if ( etype == filetype && + ompi_datatype_is_predefined (filetype ) && + ftype_extent == (OPAL_PTRDIFF_TYPE)ftype_size ){ + ompi_datatype_t *newfiletype; + ompi_datatype_create_contiguous(MCA_IO_DEFAULT_FILE_VIEW_SIZE, + &ompi_mpi_byte.dt, + &newfiletype); + ompi_datatype_commit (&newfiletype); + mca_io_ompio_set_view_internal (fh, + disp, + etype, + newfiletype, + datarep, + info); + ompi_datatype_destroy ( &newfiletype ); + } + else { + mca_io_ompio_set_view_internal (fh, + disp, + etype, + filetype, + datarep, + info); + } if (OMPI_SUCCESS != mca_fcoll_base_file_select (&data->ompio_fh, NULL)) { @@ -189,7 +213,7 @@ int mca_io_ompio_file_get_view (struct ompi_file_t *fp, *disp = fh->f_disp; ompi_datatype_duplicate (fh->f_etype, etype); - ompi_datatype_duplicate (fh->f_filetype, filetype); + ompi_datatype_duplicate (fh->f_orig_filetype, filetype); strcpy (datarep, fh->f_datarep); return OMPI_SUCCESS; From 1ab05451ec28148aa94a1d8e6a92d4de58dd2909 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Fri, 7 Aug 2015 13:06:39 -0500 Subject: [PATCH 04/14] Performance tuning: change the default behavior of ompio to *not* segment individual read/write operations. In most cases, performance seems to be better if not segmented. --- ompi/mca/io/ompio/io_ompio_component.c | 6 +++--- ompi/mca/io/ompio/io_ompio_file_read.c | 7 ++++++- ompi/mca/io/ompio/io_ompio_file_write.c | 7 ++++++- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/ompi/mca/io/ompio/io_ompio_component.c b/ompi/mca/io/ompio/io_ompio_component.c index 9f64fca068..239ec425ae 100644 --- a/ompi/mca/io/ompio/io_ompio_component.c +++ b/ompi/mca/io/ompio/io_ompio_component.c @@ -31,7 +31,7 @@ #include "ompi/mca/io/io.h" #include "io_ompio.h" -int mca_io_ompio_cycle_buffer_size = OMPIO_PREALLOC_MAX_BUF_SIZE; +int mca_io_ompio_cycle_buffer_size = -1; int mca_io_ompio_bytes_per_agg = OMPIO_PREALLOC_MAX_BUF_SIZE; int mca_io_ompio_num_aggregators = -1; int mca_io_ompio_record_offset_info = 0; @@ -162,10 +162,10 @@ static int register_component(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_io_ompio_coll_timing_info); - mca_io_ompio_cycle_buffer_size = OMPIO_PREALLOC_MAX_BUF_SIZE; + mca_io_ompio_cycle_buffer_size = -1; (void) mca_base_component_var_register(&mca_io_ompio_component.io_version, "cycle_buffer_size", - "Cycle buffer size of individual reads/writes", + "Data size issued by individual reads/writes per call", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, diff --git a/ompi/mca/io/ompio/io_ompio_file_read.c b/ompi/mca/io/ompio/io_ompio_file_read.c index 1150bf27fc..fe30169b16 100644 --- a/ompi/mca/io/ompio/io_ompio_file_read.c +++ b/ompi/mca/io/ompio/io_ompio_file_read.c @@ -106,7 +106,12 @@ int ompio_io_ompio_file_read (mca_io_ompio_file_t *fh, &decoded_iov, &iov_count); - bytes_per_cycle = mca_io_ompio_cycle_buffer_size; + if ( -1 == mca_io_ompio_cycle_buffer_size ) { + bytes_per_cycle = max_data; + } + else { + bytes_per_cycle = mca_io_ompio_cycle_buffer_size; + } cycles = ceil((float)max_data/bytes_per_cycle); #if 0 diff --git a/ompi/mca/io/ompio/io_ompio_file_write.c b/ompi/mca/io/ompio/io_ompio_file_write.c index 08038ddc66..28d8e4a3a7 100644 --- a/ompi/mca/io/ompio/io_ompio_file_write.c +++ b/ompi/mca/io/ompio/io_ompio_file_write.c @@ -103,7 +103,12 @@ int ompio_io_ompio_file_write (mca_io_ompio_file_t *fh, &decoded_iov, &iov_count); - bytes_per_cycle = mca_io_ompio_cycle_buffer_size; + if ( -1 == mca_io_ompio_cycle_buffer_size ) { + bytes_per_cycle = max_data; + } + else { + bytes_per_cycle = mca_io_ompio_cycle_buffer_size; + } cycles = ceil((float)max_data/bytes_per_cycle); #if 0 From f12995ad084fb8b524299adbf5c6a2d80b21fd46 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Fri, 7 Aug 2015 13:30:50 -0500 Subject: [PATCH 05/14] free memory correctly in case of an error. Fixes CID 131540 and CID 1315419 --- .../sharedfp_individual_file_open.c | 22 ++++++++++++++++++- .../sharedfp_lockedfile_file_open.c | 5 +++++ ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c | 5 +++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c b/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c index 01dac1eceb..a0d33fe7a6 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c +++ b/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c @@ -59,6 +59,7 @@ int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm, if ( NULL == sh ){ opal_output(0, "mca_sharedfp_individual_file_open: Error, unable to malloc " "f_sharedfp_ptr struct\n"); + free (shfileHandle ); return OMPI_ERR_OUT_OF_RESOURCE; } @@ -93,6 +94,8 @@ int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm, MPI_INFO_NULL, datafilehandle, false); if ( OMPI_SUCCESS != err) { opal_output(0, "mca_sharedfp_individual_file_open: Error during datafile file open\n"); + free (shfileHandle ); + free (sh); return err; } @@ -105,15 +108,32 @@ int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm, } /* metadata filename created by appending .metadata.$rank to the original filename*/ - metadatafilename = (char*) malloc ( len ); + metadatafilename = (char*) malloc ( len ); + if ( NULL == metadatafilename ) { + free (shfileHandle ); + free (sh); + opal_output(0, "mca_sharedfp_individual_file_open: Error during memory allocation\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } snprintf ( metadatafilename, len, "%s%s%d", filename, ".metadata.",rank); metadatafilehandle = (mca_io_ompio_file_t *)malloc(sizeof(mca_io_ompio_file_t)); + if ( NULL == metadatafilehandle ) { + free (shfileHandle ); + free (sh); + free (metadatafilename); + opal_output(0, "mca_sharedfp_individual_file_open: Error during memory allocation\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } err = ompio_io_ompio_file_open ( MPI_COMM_SELF,metadatafilename, MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, metadatafilehandle, false); if ( OMPI_SUCCESS != err) { opal_output(0, "mca_sharedfp_individual_file_open: Error during metadatafile file open\n"); + free (shfileHandle ); + free (sh); + free (metadatafilename); + free (metadatafilehandle); return err; } diff --git a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c index 4687a70c9f..c21e8e15ba 100644 --- a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c +++ b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c @@ -58,6 +58,7 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, sh = (struct mca_sharedfp_base_data_t*)malloc(sizeof(struct mca_sharedfp_base_data_t)); if ( NULL == sh){ opal_output(0, "mca_sharedfp_lockedfile_file_open: Error, unable to malloc f_sharedfp_ptr struct\n"); + free ( shfileHandle); return OMPI_ERR_OUT_OF_RESOURCE; } /*Populate the sh file structure based on the implementation*/ @@ -77,6 +78,8 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, module_data = (struct mca_sharedfp_lockedfile_data*)malloc(sizeof(struct mca_sharedfp_lockedfile_data)); if ( NULL == module_data ) { printf("mca_sharedfp_lockedfile_file_open: Error, unable to malloc lockedfile_data struct\n"); + free (shfileHandle); + free (sh); return OMPI_ERR_OUT_OF_RESOURCE; } @@ -101,6 +104,8 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, handle = open ( lockedfilename, O_RDWR, 0644 ); if ( -1 == handle ) { printf("[%d]mca_sharedfp_lockedfile_file_open: Error during file open\n", rank); + free (shfileHandle); + free (sh); free(module_data); return OMPI_ERROR; } diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c index d6c15a92ad..430f4ef7e7 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c @@ -63,9 +63,14 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, /*Open the same file again without shared file pointer*/ /*----------------------------------------------------*/ shfileHandle = (mca_io_ompio_file_t *)malloc(sizeof(mca_io_ompio_file_t)); + if ( NULL == shfileHandle ) { + opal_output(0, "mca_sharedfp_sm_file_open: Error during memory allocation\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } err = ompio_io_ompio_file_open(comm,filename,amode,info,shfileHandle,false); if ( OMPI_SUCCESS != err) { opal_output(0, "mca_sharedfp_sm_file_open: Error during file open\n"); + free (shfileHandle); return err; } From fcaa733539f52934ab6dc92d274464558a8b8d40 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Thu, 30 Jul 2015 17:24:01 -0500 Subject: [PATCH 06/14] make sure the final number of aggregators is recorded correctly when not using our aggregator selection logic. --- ompi/mca/io/ompio/io_ompio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ompi/mca/io/ompio/io_ompio.c b/ompi/mca/io/ompio/io_ompio.c index d1baa63e3b..bcb48a77c2 100644 --- a/ompi/mca/io/ompio/io_ompio.c +++ b/ompi/mca/io/ompio/io_ompio.c @@ -1053,6 +1053,7 @@ int ompi_io_ompio_set_aggregator_props (struct mca_io_ompio_file_t *fh, } fh->f_aggregator_index = 0; + fh->f_final_num_aggrs = num_aggregators; return OMPI_SUCCESS; } From 23ad3d0078cbe6868964adfc08f592de630cb6f6 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Fri, 7 Aug 2015 16:32:53 -0500 Subject: [PATCH 07/14] Performance tuning: increase the priority of the sm sharedfp component to ensure that it is selected if it can run. --- ompi/mca/sharedfp/sm/sharedfp_sm_component.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_component.c b/ompi/mca/sharedfp/sm/sharedfp_sm_component.c index 3f629da8a2..69a0e06a0c 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_component.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_component.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2013-2015 University of Houston. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -38,7 +38,7 @@ const char *mca_sharedfp_sm_component_version_string = /* * Global variables */ -int mca_sharedfp_sm_priority=10; +int mca_sharedfp_sm_priority=30; int mca_sharedfp_sm_verbose=0; static int sm_register(void); @@ -72,7 +72,7 @@ mca_sharedfp_base_component_2_0_0_t mca_sharedfp_sm_component = { static int sm_register(void) { - mca_sharedfp_sm_priority = 10; + mca_sharedfp_sm_priority = 30; (void) mca_base_component_var_register(&mca_sharedfp_sm_component.sharedfpm_version, "priority", "Priority of the sm sharedfp component", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, From 219253027ab6e0ce7645518d88746a7e08238c64 Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Tue, 11 Aug 2015 08:35:22 -0600 Subject: [PATCH 08/14] pml/crcpw - remote crcpw pml First in a series of commits to remove unmaintained checkpoint/restart related code from the v2.x release branch. Signed-off-by: Howard Pritchard --- ompi/mca/pml/crcpw/Makefile.am | 42 -- ompi/mca/pml/crcpw/configure.m4 | 28 - ompi/mca/pml/crcpw/owner.txt | 7 - ompi/mca/pml/crcpw/pml_crcpw.h | 149 ---- ompi/mca/pml/crcpw/pml_crcpw_component.c | 192 ------ ompi/mca/pml/crcpw/pml_crcpw_module.c | 842 ----------------------- 6 files changed, 1260 deletions(-) delete mode 100644 ompi/mca/pml/crcpw/Makefile.am delete mode 100644 ompi/mca/pml/crcpw/configure.m4 delete mode 100644 ompi/mca/pml/crcpw/owner.txt delete mode 100644 ompi/mca/pml/crcpw/pml_crcpw.h delete mode 100644 ompi/mca/pml/crcpw/pml_crcpw_component.c delete mode 100644 ompi/mca/pml/crcpw/pml_crcpw_module.c diff --git a/ompi/mca/pml/crcpw/Makefile.am b/ompi/mca/pml/crcpw/Makefile.am deleted file mode 100644 index 381c37fd35..0000000000 --- a/ompi/mca/pml/crcpw/Makefile.am +++ /dev/null @@ -1,42 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -crcpw_sources = \ - pml_crcpw.h \ - pml_crcpw_component.c \ - pml_crcpw_module.c - -if MCA_BUILD_ompi_pml_crcpw_DSO -component_noinst = -component_install = mca_pml_crcpw.la -else -component_noinst = libmca_pml_crcpw.la -component_install = -endif - - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_pml_crcpw_la_SOURCES = $(crcpw_sources) -mca_pml_crcpw_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_pml_crcpw_la_SOURCES = $(crcpw_sources) -libmca_pml_crcpw_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/pml/crcpw/configure.m4 b/ompi/mca/pml/crcpw/configure.m4 deleted file mode 100644 index b09529be4f..0000000000 --- a/ompi/mca/pml/crcpw/configure.m4 +++ /dev/null @@ -1,28 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2010 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2004-2005 The Trustees of the University of Tennessee. -# All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_pml_crcpw_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_ompi_pml_crcpw_CONFIG],[ - AC_CONFIG_FILES([ompi/mca/pml/crcpw/Makefile]) - - # If we don't want FT, don't compile this component - AS_IF([test "$opal_want_ft_cr" = "1"], - [$1], - [$2]) -])dnl diff --git a/ompi/mca/pml/crcpw/owner.txt b/ompi/mca/pml/crcpw/owner.txt deleted file mode 100644 index 29b7476009..0000000000 --- a/ompi/mca/pml/crcpw/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: nobody -status: unmaintained diff --git a/ompi/mca/pml/crcpw/pml_crcpw.h b/ompi/mca/pml/crcpw/pml_crcpw.h deleted file mode 100644 index 5f6b800836..0000000000 --- a/ompi/mca/pml/crcpw/pml_crcpw.h +++ /dev/null @@ -1,149 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2006 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_CRCPW_H -#define MCA_PML_CRCPW_H - -#include "ompi_config.h" - -#include "opal/class/opal_free_list.h" -#include "ompi/request/request.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/pml_base_request.h" -#include "ompi/mca/pml/base/pml_base_bsend.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/mca/crcp/crcp.h" -#include "ompi/mca/crcp/base/base.h" - -BEGIN_C_DECLS - - /** - * CRCPW PML module - */ - struct mca_pml_crcpw_component_t { - mca_pml_base_component_t super; - int verbose; - int priority; - int output_handle; - bool pml_crcp_wrapped; - }; - typedef struct mca_pml_crcpw_component_t mca_pml_crcpw_component_t; - OMPI_MODULE_DECLSPEC extern mca_pml_crcpw_component_t mca_pml_crcpw_component; - - struct mca_pml_crcpw_module_t { - mca_pml_base_module_t super; - mca_pml_base_component_t wrapped_pml_component; - mca_pml_base_module_t wrapped_pml_module; - }; - typedef struct mca_pml_crcpw_module_t mca_pml_crcpw_module_t; - extern mca_pml_crcpw_module_t mca_pml_crcpw_module; - - /* Free list of PML states */ - OMPI_MODULE_DECLSPEC extern opal_free_list_t pml_state_list; - OMPI_MODULE_DECLSPEC extern bool pml_crcpw_is_finalized; - - /* - * PML module functions. - */ - int mca_pml_crcpw_component_open(void); - int mca_pml_crcpw_component_close(void); - - mca_pml_base_module_t* mca_pml_crcpw_component_init(int *priority, - bool enable_progress_threads, - bool enable_mpi_threads - ); - - int mca_pml_crcpw_component_finalize(void); - - /* - * PML interface functions. - */ - int mca_pml_crcpw_enable( bool enable ); - - int mca_pml_crcpw_add_comm( struct ompi_communicator_t* comm ); - int mca_pml_crcpw_del_comm( struct ompi_communicator_t* comm ); - - int mca_pml_crcpw_add_procs( struct ompi_proc_t **procs, size_t nprocs ); - int mca_pml_crcpw_del_procs( struct ompi_proc_t **procs, size_t nprocs ); - - int mca_pml_crcpw_progress(void); - - int mca_pml_crcpw_iprobe(int dst, int tag, struct ompi_communicator_t* comm, int *matched, ompi_status_public_t* status ); - - int mca_pml_crcpw_probe( int dst, int tag, struct ompi_communicator_t* comm, ompi_status_public_t* status ); - - - int mca_pml_crcpw_improbe( int dst, - int tag, - struct ompi_communicator_t* comm, - int *matched, - struct ompi_message_t **message, - ompi_status_public_t* status ); - - int mca_pml_crcpw_mprobe( int dst, - int tag, - struct ompi_communicator_t* comm, - struct ompi_message_t **message, - ompi_status_public_t* status ); - - int mca_pml_crcpw_isend_init( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, - mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request ); - - int mca_pml_crcpw_isend( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, - mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request ); - - int mca_pml_crcpw_send( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, - mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm ); - - int mca_pml_crcpw_irecv_init( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, - struct ompi_communicator_t* comm, struct ompi_request_t **request); - - int mca_pml_crcpw_irecv( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, - struct ompi_communicator_t* comm, struct ompi_request_t **request ); - - int mca_pml_crcpw_recv( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, - struct ompi_communicator_t* comm, ompi_status_public_t* status); - - int mca_pml_crcpw_imrecv( void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - struct ompi_request_t **request ); - - int mca_pml_crcpw_mrecv( void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - ompi_status_public_t* status ); - - int mca_pml_crcpw_dump( struct ompi_communicator_t* comm, int verbose ); - - int mca_pml_crcpw_start( size_t count, ompi_request_t** requests ); - - int mca_pml_crcpw_ft_event(int state); - -END_C_DECLS - -#endif /* MCA_PML_CRCPW_H */ diff --git a/ompi/mca/pml/crcpw/pml_crcpw_component.c b/ompi/mca/pml/crcpw/pml_crcpw_component.c deleted file mode 100644 index 64bbfe455d..0000000000 --- a/ompi/mca/pml/crcpw/pml_crcpw_component.c +++ /dev/null @@ -1,192 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2006 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "opal/runtime/opal.h" -#include "opal/util/output.h" -#include "opal/mca/event/event.h" -#include "opal/mca/btl/base/base.h" - -#include "mpi.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/pml_base_bsend.h" -#include "ompi/mca/pml/crcpw/pml_crcpw.h" -#include "ompi/mca/bml/base/base.h" - -static int mca_pml_crcpw_component_register(void); - -mca_pml_crcpw_component_t mca_pml_crcpw_component = { - { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - .pmlm_version = { - MCA_PML_BASE_VERSION_2_0_0, - - .mca_component_name = "crcpw", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - .mca_open_component = mca_pml_crcpw_component_open, - .mca_close_component = mca_pml_crcpw_component_close, - .mca_register_component_params = mca_pml_crcpw_component_register, - }, - .pmlm_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .pmlm_init = mca_pml_crcpw_component_init, - .pmlm_finalize = mca_pml_crcpw_component_finalize, - }, - /* Verbosity */ - 0, - /* Priority */ - PML_SELECT_WRAPPER_PRIORITY, - /* Are we being used as a wrapper? */ - false -}; - -opal_free_list_t pml_state_list; -bool pml_crcpw_is_finalized = false; - -static int mca_pml_crcpw_component_register(void) -{ - /* - * Register some MCA parameters - */ - mca_pml_crcpw_component.priority = PML_SELECT_WRAPPER_PRIORITY; - (void) mca_base_component_var_register(&mca_pml_crcpw_component.super.pmlm_version, "priority", - "Priority of the PML crcpw component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_pml_crcpw_component.priority); - - mca_pml_crcpw_component.verbose = 0; - (void) mca_base_component_var_register(&mca_pml_crcpw_component.super.pmlm_version, "verbose", - "Verbose level for the PML crcpw component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_pml_crcpw_component.verbose); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_component_open(void) -{ - opal_output_verbose( 10, mca_pml_crcpw_component.output_handle, - "pml:crcpw: component_open: Open"); - - mca_pml_crcpw_component.output_handle = opal_output_open(NULL); - if ( 0 != mca_pml_crcpw_component.verbose) { - opal_output_set_verbosity(mca_pml_crcpw_component.output_handle, - mca_pml_crcpw_component.verbose); - } - - /* - * Debug Output - */ - opal_output_verbose(10, mca_pml_crcpw_component.output_handle, - "pml:crcpw: open()"); - opal_output_verbose(20, mca_pml_crcpw_component.output_handle, - "pml:crcpw: open: priority = %d", - mca_pml_crcpw_component.priority); - opal_output_verbose(20, mca_pml_crcpw_component.output_handle, - "pml:crcpw: open: verbosity = %d", - mca_pml_crcpw_component.verbose); - - return OMPI_SUCCESS; -} - - -int mca_pml_crcpw_component_close(void) -{ - opal_output_verbose( 20, mca_pml_crcpw_component.output_handle, - "pml:crcpw: component_close: Close"); - - return OMPI_SUCCESS; -} - - -mca_pml_base_module_t* mca_pml_crcpw_component_init(int* priority, - bool enable_progress_threads, - bool enable_mpi_threads) -{ - /* We use the PML_SELECT_WRAPPER_PRIORITY to indicate when this - * component should wrap around what is already selected - * If it is not set to this seminal value, then we are doing a - * normal selection operation - */ - if(*priority == PML_SELECT_WRAPPER_PRIORITY ) { - opal_output_verbose( 20, mca_pml_crcpw_component.output_handle, - "pml:crcpw: component_init: Wrap the selected component %s", - mca_pml_base_selected_component.pmlm_version.mca_component_name); - - mca_pml_crcpw_module.wrapped_pml_component = mca_pml_base_selected_component; - mca_pml_crcpw_module.wrapped_pml_module = mca_pml; - mca_pml_crcpw_component.pml_crcp_wrapped = true; - - opal_output_verbose( 20, mca_pml_crcpw_component.output_handle, - "pml:crcpw: component_init: Initalize Wrapper"); - - OBJ_CONSTRUCT(&pml_state_list, opal_free_list_t); - opal_free_list_init (&pml_state_list, - sizeof(ompi_crcp_base_pml_state_t), - opal_cache_line_size, - OBJ_CLASS(ompi_crcp_base_pml_state_t), - 0,opal_cache_line_size, - 5, /* Initial number */ - -1, /* Max = Unlimited */ - 64, /* Increment by */ - NULL, 0, NULL, NULL, NULL); - } - else { - opal_output_verbose( 20, mca_pml_crcpw_component.output_handle, - "pml:crcpw: component_init: Priority %d", - mca_pml_crcpw_component.priority); - } - - - *priority = mca_pml_crcpw_component.priority; - - pml_crcpw_is_finalized = false; - - return &mca_pml_crcpw_module.super; -} - -int mca_pml_crcpw_component_finalize(void) -{ - opal_output_verbose( 20, mca_pml_crcpw_component.output_handle, - "pml:crcpw: component_finalize: Finalize"); - - OBJ_DESTRUCT(&pml_state_list); - - pml_crcpw_is_finalized = true; - - if(mca_pml_crcpw_component.pml_crcp_wrapped) { - return mca_pml_crcpw_module.wrapped_pml_component.pmlm_finalize(); - } - - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/pml/crcpw/pml_crcpw_module.c b/ompi/mca/pml/crcpw/pml_crcpw_module.c deleted file mode 100644 index 51ed444221..0000000000 --- a/ompi/mca/pml/crcpw/pml_crcpw_module.c +++ /dev/null @@ -1,842 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2006 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include - -#include "opal/mca/btl/base/base.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/crcpw/pml_crcpw.h" -#include "ompi/mca/bml/base/base.h" - -#include "opal/class/opal_free_list.h" - -mca_pml_crcpw_module_t mca_pml_crcpw_module = { - { - mca_pml_crcpw_add_procs, - mca_pml_crcpw_del_procs, - mca_pml_crcpw_enable, - mca_pml_crcpw_progress, - mca_pml_crcpw_add_comm, - mca_pml_crcpw_del_comm, - mca_pml_crcpw_irecv_init, - mca_pml_crcpw_irecv, - mca_pml_crcpw_recv, - mca_pml_crcpw_isend_init, - mca_pml_crcpw_isend, - mca_pml_crcpw_send, - mca_pml_crcpw_iprobe, - mca_pml_crcpw_probe, - mca_pml_crcpw_start, - mca_pml_crcpw_improbe, - mca_pml_crcpw_mprobe, - mca_pml_crcpw_imrecv, - mca_pml_crcpw_mrecv, - mca_pml_crcpw_dump, - mca_pml_crcpw_ft_event, - - 32768, - INT_MAX - } -}; - -#define PML_CRCP_STATE_ALLOC(pml_state) \ -do { \ - if( !pml_crcpw_is_finalized ) { \ - pml_state = (ompi_crcp_base_pml_state_t*) \ - opal_free_list_wait (&pml_state_list); \ - } \ -} while(0); - -#define PML_CRCP_STATE_RETURN(pml_state) \ -do { \ - if( !pml_crcpw_is_finalized ) { \ - opal_free_list_return (&pml_state_list, \ - (opal_free_list_item_t*)pml_state); \ - } \ -} while(0); - -int mca_pml_crcpw_enable(bool enable) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - if( OPAL_UNLIKELY(NULL == ompi_crcp.pml_enable) ) { - return mca_pml_crcpw_module.wrapped_pml_module.pml_enable(enable); - } - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_enable(enable, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_enable(enable) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_enable(enable, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_add_comm(ompi_communicator_t* comm) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - if( OPAL_UNLIKELY(NULL == ompi_crcp.pml_add_comm) ) { - return mca_pml_crcpw_module.wrapped_pml_module.pml_add_comm(comm); - } - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_add_comm(comm, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_add_comm(comm) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_add_comm(comm, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_del_comm(ompi_communicator_t* comm) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - if( OPAL_UNLIKELY(NULL == ompi_crcp.pml_del_comm) ) { - return mca_pml_crcpw_module.wrapped_pml_module.pml_del_comm(comm); - } - - PML_CRCP_STATE_ALLOC(pml_state); - if( NULL == pml_state ) { - return mca_pml_crcpw_module.wrapped_pml_module.pml_del_comm(comm); - } - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_del_comm(comm, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_del_comm(comm) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_del_comm(comm, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_add_procs(ompi_proc_t** procs, size_t nprocs) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_add_procs(procs, nprocs, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_add_procs(procs, nprocs) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_add_procs(procs, nprocs, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_del_procs(ompi_proc_t** procs, size_t nprocs) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_del_procs(procs, nprocs, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_del_procs(procs, nprocs) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_del_procs(procs, nprocs, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_iprobe(int dst, int tag, struct ompi_communicator_t* comm, int *matched, ompi_status_public_t* status ) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_iprobe(dst, tag, comm, matched, status, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_DONE == pml_state->state) { - goto CLEANUP; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_iprobe(dst, tag, comm, matched, status) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_iprobe(dst, tag, comm, matched, status, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - CLEANUP: - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_probe( int dst, int tag, struct ompi_communicator_t* comm, ompi_status_public_t* status ) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_probe(dst, tag, comm, status, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_DONE == pml_state->state) { - goto CLEANUP; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_probe(dst, tag, comm, status) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_probe(dst, tag, comm, status, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - CLEANUP: - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_isend_init( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, - mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request ) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_isend_init(buf, count, datatype, dst, tag, mode, comm, request, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_isend_init(buf, count, datatype, dst, tag, mode, comm, request) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_isend_init(buf, count, datatype, dst, tag, mode, comm, request, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_isend( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, - mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request ) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_isend(buf, count, datatype, dst, tag, mode, comm, request, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_isend(buf, count, datatype, dst, tag, mode, comm, request) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_isend(buf, count, datatype, dst, tag, mode, comm, request, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - opal_cr_stall_check = false; - OPAL_CR_TEST_CHECKPOINT_READY(); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_send( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, - mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm ) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_send(buf, count, datatype, dst, tag, mode, comm, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_send(buf, count, datatype, dst, tag, mode, comm) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_send(buf, count, datatype, dst, tag, mode, comm, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - opal_cr_stall_check = false; - OPAL_CR_TEST_CHECKPOINT_READY(); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_irecv_init( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, - struct ompi_communicator_t* comm, struct ompi_request_t **request) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_irecv_init(buf, count, datatype, src, tag, comm, request, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_irecv_init(buf, count, datatype, src, tag, comm, request) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_irecv_init(buf, count, datatype, src, tag, comm, request, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_irecv( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, - struct ompi_communicator_t* comm, struct ompi_request_t **request ) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_irecv(buf, count, datatype, src, tag, comm, request, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_DONE == pml_state->state) { - goto CLEANUP; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_irecv(buf, count, datatype, src, tag, comm, request) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_irecv(buf, count, datatype, src, tag, comm, request, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - CLEANUP: - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_recv( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, - struct ompi_communicator_t* comm, ompi_status_public_t* given_status) -{ - int ret = OMPI_SUCCESS, actual_ret = OMPI_SUCCESS; - ompi_status_public_t* status = NULL; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - if( given_status == NULL) { - status = (ompi_status_public_t*)malloc(sizeof(ompi_status_public_t)); - } - else { - status = given_status; - } - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_recv(buf, count, datatype, src, tag, comm, status, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_DONE == pml_state->state) { - goto CLEANUP; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (actual_ret = mca_pml_crcpw_module.wrapped_pml_module.pml_recv(buf, count, datatype, src, tag, comm, status) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_recv(buf, count, datatype, src, tag, comm, status, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( given_status == NULL) { - free(status); - } - - CLEANUP: - PML_CRCP_STATE_RETURN(pml_state); - - opal_cr_stall_check = false; - OPAL_CR_TEST_CHECKPOINT_READY(); - - return actual_ret; -} - -int mca_pml_crcpw_dump( struct ompi_communicator_t* comm, int verbose ) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_dump(comm, verbose, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_dump(comm, verbose) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_dump(comm, verbose, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_progress(void) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - if( OPAL_LIKELY(NULL == ompi_crcp.pml_progress) ) { - return mca_pml_crcpw_module.wrapped_pml_module.pml_progress(); - } - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_progress(pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_progress() ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_progress(pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - -int mca_pml_crcpw_start( size_t count, ompi_request_t** requests ) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_start(count, requests, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_DONE == pml_state->state) { - goto CLEANUP; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_start(count, requests) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_start(count, requests, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - CLEANUP: - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} - - -int -mca_pml_crcpw_improbe(int dst, - int tag, - struct ompi_communicator_t* comm, - int *matched, - struct ompi_message_t **message, - ompi_status_public_t* status) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - - -int -mca_pml_crcpw_mprobe(int dst, - int tag, - struct ompi_communicator_t* comm, - struct ompi_message_t **message, - ompi_status_public_t* status) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - - -int -mca_pml_crcpw_imrecv(void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - struct ompi_request_t **request) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - - -int -mca_pml_crcpw_mrecv(void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - ompi_status_public_t* status) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - - -int mca_pml_crcpw_ft_event(int state) -{ - int ret; - ompi_crcp_base_pml_state_t * pml_state = NULL; - - PML_CRCP_STATE_ALLOC(pml_state); - - pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component); - pml_state->wrapped_pml_module = &(mca_pml_crcpw_module.wrapped_pml_module); - - pml_state->state = OMPI_CRCP_PML_PRE; - pml_state = ompi_crcp.pml_ft_event(state, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - if( OMPI_CRCP_PML_SKIP != pml_state->state && - NULL != mca_pml_crcpw_module.wrapped_pml_module.pml_ft_event) { - if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_ft_event(state) ) ) { - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - } - - pml_state->state = OMPI_CRCP_PML_POST; - pml_state = ompi_crcp.pml_ft_event(state, pml_state); - if( OMPI_SUCCESS != pml_state->error_code) { - ret = pml_state->error_code; - PML_CRCP_STATE_RETURN(pml_state); - return ret; - } - - PML_CRCP_STATE_RETURN(pml_state); - - return OMPI_SUCCESS; -} From 78f85e095032cf14c65f5ee2300b6fadb98a0f6a Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Thu, 30 Jul 2015 17:30:07 -0500 Subject: [PATCH 09/14] Performance tuning: make sure the individual component is selected for 1 and 2 process communicators (important for some benchmarks) --- ompi/mca/fcoll/base/fcoll_base_file_select.c | 3 +++ ompi/mca/fcoll/individual/fcoll_individual_module.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/ompi/mca/fcoll/base/fcoll_base_file_select.c b/ompi/mca/fcoll/base/fcoll_base_file_select.c index 433098af36..9dba84ed43 100644 --- a/ompi/mca/fcoll/base/fcoll_base_file_select.c +++ b/ompi/mca/fcoll/base/fcoll_base_file_select.c @@ -262,6 +262,9 @@ int mca_fcoll_base_query_table (struct mca_io_ompio_file_t *file, char *name) file->f_cc_size >= file->f_stripe_size) { return 1; } + if ( 2 >= (int)file->f_size ){ + return 1; + } } if (!strcmp (name, "dynamic")) { if ((int)file->f_cc_size < file->f_bytes_per_agg && diff --git a/ompi/mca/fcoll/individual/fcoll_individual_module.c b/ompi/mca/fcoll/individual/fcoll_individual_module.c index b9c95a86ba..eb30d6c41c 100644 --- a/ompi/mca/fcoll/individual/fcoll_individual_module.c +++ b/ompi/mca/fcoll/individual/fcoll_individual_module.c @@ -64,6 +64,9 @@ mca_fcoll_individual_component_file_query (mca_io_ompio_file_t *fh, int *priorit if (*priority < 50) { *priority = 50; } + if ( 2 >= fh->f_size ) { + *priority = 100; + } } return &individual; From 6f50ca19ac19ccd72aec11365e134821335af36d Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Fri, 7 Aug 2015 11:50:25 -0500 Subject: [PATCH 10/14] free the datatypes that were created using type_dup during file_set_view --- ompi/mca/io/ompio/io_ompio_file_open.c | 8 ++++++++ ompi/mca/io/ompio/io_ompio_file_set_view.c | 5 ++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/ompi/mca/io/ompio/io_ompio_file_open.c b/ompi/mca/io/ompio/io_ompio_file_open.c index 057c57e7af..3d85a20a21 100644 --- a/ompi/mca/io/ompio/io_ompio_file_open.c +++ b/ompi/mca/io/ompio/io_ompio_file_open.c @@ -380,6 +380,14 @@ ompio_io_ompio_file_close (mca_io_ompio_file_t *ompio_fh) ompi_datatype_destroy (&ompio_fh->f_iov_type); } + if ( MPI_DATATYPE_NULL != ompio_fh->f_etype ) { + ompi_datatype_destroy (&ompio_fh->f_etype); + } + if ( MPI_DATATYPE_NULL != ompio_fh->f_filetype ){ + ompi_datatype_destroy (&ompio_fh->f_filetype); + } + + if (MPI_COMM_NULL != ompio_fh->f_comm && (ompio_fh->f_flags & OMPIO_SHAREDFP_IS_SET) ) { ompi_comm_free (&ompio_fh->f_comm); } diff --git a/ompi/mca/io/ompio/io_ompio_file_set_view.c b/ompi/mca/io/ompio/io_ompio_file_set_view.c index 812e3096b7..7cbdec6801 100644 --- a/ompi/mca/io/ompio/io_ompio_file_set_view.c +++ b/ompi/mca/io/ompio/io_ompio_file_set_view.c @@ -138,7 +138,10 @@ int mca_io_ompio_file_set_view (ompi_file_t *fp, data = (mca_io_ompio_data_t *) fp->f_io_selected_data; fh = &data->ompio_fh; - + + ompi_datatype_destroy (&fh->f_etype); + ompi_datatype_destroy (&fh->f_filetype); + if (NULL != fh->f_decoded_iov) { free (fh->f_decoded_iov); fh->f_decoded_iov = NULL; From 53184544eea1adc52c3834a658ff56688de0aaee Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Fri, 7 Aug 2015 12:49:58 -0500 Subject: [PATCH 11/14] Performance tuning. make sure we catch if the user wants to set the default fileview and replace it with our optimized default file view. Otherwise, performance will suffer. file_get_view should still return the correct filetype, not our optimized default file view. This is the correct version compared to ffa67b96933acafb3cfacf39a074d3dc1fe134ae, which unfortunately broke some test cases in mpi_test_suite. Thanks for @ggouaillardet for reporting this! Conflicts: ompi/mca/io/ompio/io_ompio.c ompi/mca/io/ompio/io_ompio_file_set_view.c --- ompi/mca/io/ompio/io_ompio.c | 8 ++--- ompi/mca/io/ompio/io_ompio.h | 3 ++ ompi/mca/io/ompio/io_ompio_file_open.c | 4 +++ ompi/mca/io/ompio/io_ompio_file_set_view.c | 42 +++++++++++++++++----- 4 files changed, 44 insertions(+), 13 deletions(-) diff --git a/ompi/mca/io/ompio/io_ompio.c b/ompi/mca/io/ompio/io_ompio.c index dd8c1f665f..bcb48a77c2 100644 --- a/ompi/mca/io/ompio/io_ompio.c +++ b/ompi/mca/io/ompio/io_ompio.c @@ -102,7 +102,6 @@ static int mca_io_ompio_merge_groups(mca_io_ompio_file_t *fh, int num_merge_aggrs); - int ompi_io_ompio_set_file_defaults (mca_io_ompio_file_t *fh) { @@ -133,15 +132,16 @@ int ompi_io_ompio_set_file_defaults (mca_io_ompio_file_t *fh) fh->f_init_num_aggrs = -1; fh->f_init_aggr_list = NULL; - ompi_datatype_create_contiguous(1048576, + ompi_datatype_create_contiguous(MCA_IO_DEFAULT_FILE_VIEW_SIZE, &ompi_mpi_byte.dt, &default_file_view); ompi_datatype_commit (&default_file_view); fh->f_etype = &ompi_mpi_byte.dt; fh->f_filetype = default_file_view; - - + ompi_datatype_duplicate ( &ompi_mpi_byte.dt, &fh->f_orig_filetype ); + + /* Default file View */ fh->f_iov_type = MPI_DATATYPE_NULL; fh->f_stripe_size = mca_io_ompio_bytes_per_agg; diff --git a/ompi/mca/io/ompio/io_ompio.h b/ompi/mca/io/ompio/io_ompio.h index 0649f7a01e..e15769edac 100644 --- a/ompi/mca/io/ompio/io_ompio.h +++ b/ompi/mca/io/ompio/io_ompio.h @@ -57,7 +57,9 @@ OMPI_DECLSPEC extern int mca_io_ompio_coll_timing_info; #define OMPIO_CONTIGUOUS_FVIEW 0x00000010 #define OMPIO_AGGREGATOR_IS_SET 0x00000020 #define OMPIO_SHAREDFP_IS_SET 0x00000040 + #define QUEUESIZE 2048 +#define MCA_IO_DEFAULT_FILE_VIEW_SIZE 4*1024*1024 #define OMPIO_MIN(a, b) (((a) < (b)) ? (a) : (b)) #define OMPIO_MAX(a, b) (((a) < (b)) ? (b) : (a)) @@ -320,6 +322,7 @@ struct mca_io_ompio_file_t { size_t f_view_size; ompi_datatype_t *f_etype; ompi_datatype_t *f_filetype; + ompi_datatype_t *f_orig_filetype; /* the fileview passed by the user to us */ size_t f_etype_size; /* contains IO requests that needs to be read/written */ diff --git a/ompi/mca/io/ompio/io_ompio_file_open.c b/ompi/mca/io/ompio/io_ompio_file_open.c index 3d85a20a21..75e6226947 100644 --- a/ompi/mca/io/ompio/io_ompio_file_open.c +++ b/ompi/mca/io/ompio/io_ompio_file_open.c @@ -387,6 +387,10 @@ ompio_io_ompio_file_close (mca_io_ompio_file_t *ompio_fh) ompi_datatype_destroy (&ompio_fh->f_filetype); } + if ( MPI_DATATYPE_NULL != ompio_fh->f_orig_filetype ){ + ompi_datatype_destroy (&ompio_fh->f_orig_filetype); + } + if (MPI_COMM_NULL != ompio_fh->f_comm && (ompio_fh->f_flags & OMPIO_SHAREDFP_IS_SET) ) { ompi_comm_free (&ompio_fh->f_comm); diff --git a/ompi/mca/io/ompio/io_ompio_file_set_view.c b/ompi/mca/io/ompio/io_ompio_file_set_view.c index 7cbdec6801..a45e2acf7b 100644 --- a/ompi/mca/io/ompio/io_ompio_file_set_view.c +++ b/ompi/mca/io/ompio/io_ompio_file_set_view.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2014 University of Houston. All rights reserved. + * Copyright (c) 2008-2015 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -135,12 +135,15 @@ int mca_io_ompio_file_set_view (ompi_file_t *fp, { mca_io_ompio_data_t *data; mca_io_ompio_file_t *fh; + size_t ftype_size; + OPAL_PTRDIFF_TYPE ftype_extent, lb; data = (mca_io_ompio_data_t *) fp->f_io_selected_data; fh = &data->ompio_fh; ompi_datatype_destroy (&fh->f_etype); ompi_datatype_destroy (&fh->f_filetype); + ompi_datatype_destroy (&fh->f_orig_filetype); if (NULL != fh->f_decoded_iov) { free (fh->f_decoded_iov); @@ -157,14 +160,35 @@ int mca_io_ompio_file_set_view (ompi_file_t *fp, fh->f_flags |= OMPIO_FILE_VIEW_IS_SET; fh->f_datarep = strdup (datarep); - - mca_io_ompio_set_view_internal (fh, - disp, - etype, - filetype, - datarep, - info); + ompi_datatype_duplicate (filetype, &fh->f_orig_filetype ); + opal_datatype_get_extent(&filetype->super, &lb, &ftype_extent); + opal_datatype_type_size (&filetype->super, &ftype_size); + + if ( etype == filetype && + ompi_datatype_is_predefined (filetype ) && + ftype_extent == (OPAL_PTRDIFF_TYPE)ftype_size ){ + ompi_datatype_t *newfiletype; + ompi_datatype_create_contiguous(MCA_IO_DEFAULT_FILE_VIEW_SIZE, + &ompi_mpi_byte.dt, + &newfiletype); + ompi_datatype_commit (&newfiletype); + mca_io_ompio_set_view_internal (fh, + disp, + etype, + newfiletype, + datarep, + info); + ompi_datatype_destroy ( &newfiletype ); + } + else { + mca_io_ompio_set_view_internal (fh, + disp, + etype, + filetype, + datarep, + info); + } if (OMPI_SUCCESS != mca_fcoll_base_file_select (&data->ompio_fh, NULL)) { @@ -189,7 +213,7 @@ int mca_io_ompio_file_get_view (struct ompi_file_t *fp, *disp = fh->f_disp; ompi_datatype_duplicate (fh->f_etype, etype); - ompi_datatype_duplicate (fh->f_filetype, filetype); + ompi_datatype_duplicate (fh->f_orig_filetype, filetype); strcpy (datarep, fh->f_datarep); return OMPI_SUCCESS; From c53d974ba577f511f60cd816be4c8451d2868605 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Fri, 7 Aug 2015 13:06:39 -0500 Subject: [PATCH 12/14] Performance tuning: change the default behavior of ompio to *not* segment individual read/write operations. In most cases, performance seems to be better if not segmented. --- ompi/mca/io/ompio/io_ompio_component.c | 6 +++--- ompi/mca/io/ompio/io_ompio_file_read.c | 7 ++++++- ompi/mca/io/ompio/io_ompio_file_write.c | 7 ++++++- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/ompi/mca/io/ompio/io_ompio_component.c b/ompi/mca/io/ompio/io_ompio_component.c index 9f64fca068..239ec425ae 100644 --- a/ompi/mca/io/ompio/io_ompio_component.c +++ b/ompi/mca/io/ompio/io_ompio_component.c @@ -31,7 +31,7 @@ #include "ompi/mca/io/io.h" #include "io_ompio.h" -int mca_io_ompio_cycle_buffer_size = OMPIO_PREALLOC_MAX_BUF_SIZE; +int mca_io_ompio_cycle_buffer_size = -1; int mca_io_ompio_bytes_per_agg = OMPIO_PREALLOC_MAX_BUF_SIZE; int mca_io_ompio_num_aggregators = -1; int mca_io_ompio_record_offset_info = 0; @@ -162,10 +162,10 @@ static int register_component(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_io_ompio_coll_timing_info); - mca_io_ompio_cycle_buffer_size = OMPIO_PREALLOC_MAX_BUF_SIZE; + mca_io_ompio_cycle_buffer_size = -1; (void) mca_base_component_var_register(&mca_io_ompio_component.io_version, "cycle_buffer_size", - "Cycle buffer size of individual reads/writes", + "Data size issued by individual reads/writes per call", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, diff --git a/ompi/mca/io/ompio/io_ompio_file_read.c b/ompi/mca/io/ompio/io_ompio_file_read.c index 1150bf27fc..fe30169b16 100644 --- a/ompi/mca/io/ompio/io_ompio_file_read.c +++ b/ompi/mca/io/ompio/io_ompio_file_read.c @@ -106,7 +106,12 @@ int ompio_io_ompio_file_read (mca_io_ompio_file_t *fh, &decoded_iov, &iov_count); - bytes_per_cycle = mca_io_ompio_cycle_buffer_size; + if ( -1 == mca_io_ompio_cycle_buffer_size ) { + bytes_per_cycle = max_data; + } + else { + bytes_per_cycle = mca_io_ompio_cycle_buffer_size; + } cycles = ceil((float)max_data/bytes_per_cycle); #if 0 diff --git a/ompi/mca/io/ompio/io_ompio_file_write.c b/ompi/mca/io/ompio/io_ompio_file_write.c index 08038ddc66..28d8e4a3a7 100644 --- a/ompi/mca/io/ompio/io_ompio_file_write.c +++ b/ompi/mca/io/ompio/io_ompio_file_write.c @@ -103,7 +103,12 @@ int ompio_io_ompio_file_write (mca_io_ompio_file_t *fh, &decoded_iov, &iov_count); - bytes_per_cycle = mca_io_ompio_cycle_buffer_size; + if ( -1 == mca_io_ompio_cycle_buffer_size ) { + bytes_per_cycle = max_data; + } + else { + bytes_per_cycle = mca_io_ompio_cycle_buffer_size; + } cycles = ceil((float)max_data/bytes_per_cycle); #if 0 From 3fba27b151b91c974ebdae4bfd126cb645fc98ea Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Fri, 7 Aug 2015 13:30:50 -0500 Subject: [PATCH 13/14] free memory correctly in case of an error. Fixes CID 131540 and CID 1315419 --- .../sharedfp_individual_file_open.c | 22 ++++++++++++++++++- .../sharedfp_lockedfile_file_open.c | 5 +++++ ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c | 5 +++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c b/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c index 01dac1eceb..a0d33fe7a6 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c +++ b/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c @@ -59,6 +59,7 @@ int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm, if ( NULL == sh ){ opal_output(0, "mca_sharedfp_individual_file_open: Error, unable to malloc " "f_sharedfp_ptr struct\n"); + free (shfileHandle ); return OMPI_ERR_OUT_OF_RESOURCE; } @@ -93,6 +94,8 @@ int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm, MPI_INFO_NULL, datafilehandle, false); if ( OMPI_SUCCESS != err) { opal_output(0, "mca_sharedfp_individual_file_open: Error during datafile file open\n"); + free (shfileHandle ); + free (sh); return err; } @@ -105,15 +108,32 @@ int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm, } /* metadata filename created by appending .metadata.$rank to the original filename*/ - metadatafilename = (char*) malloc ( len ); + metadatafilename = (char*) malloc ( len ); + if ( NULL == metadatafilename ) { + free (shfileHandle ); + free (sh); + opal_output(0, "mca_sharedfp_individual_file_open: Error during memory allocation\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } snprintf ( metadatafilename, len, "%s%s%d", filename, ".metadata.",rank); metadatafilehandle = (mca_io_ompio_file_t *)malloc(sizeof(mca_io_ompio_file_t)); + if ( NULL == metadatafilehandle ) { + free (shfileHandle ); + free (sh); + free (metadatafilename); + opal_output(0, "mca_sharedfp_individual_file_open: Error during memory allocation\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } err = ompio_io_ompio_file_open ( MPI_COMM_SELF,metadatafilename, MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, metadatafilehandle, false); if ( OMPI_SUCCESS != err) { opal_output(0, "mca_sharedfp_individual_file_open: Error during metadatafile file open\n"); + free (shfileHandle ); + free (sh); + free (metadatafilename); + free (metadatafilehandle); return err; } diff --git a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c index 4687a70c9f..c21e8e15ba 100644 --- a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c +++ b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c @@ -58,6 +58,7 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, sh = (struct mca_sharedfp_base_data_t*)malloc(sizeof(struct mca_sharedfp_base_data_t)); if ( NULL == sh){ opal_output(0, "mca_sharedfp_lockedfile_file_open: Error, unable to malloc f_sharedfp_ptr struct\n"); + free ( shfileHandle); return OMPI_ERR_OUT_OF_RESOURCE; } /*Populate the sh file structure based on the implementation*/ @@ -77,6 +78,8 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, module_data = (struct mca_sharedfp_lockedfile_data*)malloc(sizeof(struct mca_sharedfp_lockedfile_data)); if ( NULL == module_data ) { printf("mca_sharedfp_lockedfile_file_open: Error, unable to malloc lockedfile_data struct\n"); + free (shfileHandle); + free (sh); return OMPI_ERR_OUT_OF_RESOURCE; } @@ -101,6 +104,8 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, handle = open ( lockedfilename, O_RDWR, 0644 ); if ( -1 == handle ) { printf("[%d]mca_sharedfp_lockedfile_file_open: Error during file open\n", rank); + free (shfileHandle); + free (sh); free(module_data); return OMPI_ERROR; } diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c index 3b38c0a9fb..c6916e9a4e 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c @@ -64,9 +64,14 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, /*Open the same file again without shared file pointer*/ /*----------------------------------------------------*/ shfileHandle = (mca_io_ompio_file_t *)malloc(sizeof(mca_io_ompio_file_t)); + if ( NULL == shfileHandle ) { + opal_output(0, "mca_sharedfp_sm_file_open: Error during memory allocation\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } err = ompio_io_ompio_file_open(comm,filename,amode,info,shfileHandle,false); if ( OMPI_SUCCESS != err) { opal_output(0, "mca_sharedfp_sm_file_open: Error during file open\n"); + free (shfileHandle); return err; } From 59e07e539455f13d2377968ef52401bba25cdc85 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Fri, 7 Aug 2015 16:32:53 -0500 Subject: [PATCH 14/14] Performance tuning: increase the priority of the sm sharedfp component to ensure that it is selected if it can run. --- ompi/mca/sharedfp/sm/sharedfp_sm_component.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_component.c b/ompi/mca/sharedfp/sm/sharedfp_sm_component.c index 3f629da8a2..69a0e06a0c 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_component.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_component.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013 University of Houston. All rights reserved. + * Copyright (c) 2013-2015 University of Houston. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -38,7 +38,7 @@ const char *mca_sharedfp_sm_component_version_string = /* * Global variables */ -int mca_sharedfp_sm_priority=10; +int mca_sharedfp_sm_priority=30; int mca_sharedfp_sm_verbose=0; static int sm_register(void); @@ -72,7 +72,7 @@ mca_sharedfp_base_component_2_0_0_t mca_sharedfp_sm_component = { static int sm_register(void) { - mca_sharedfp_sm_priority = 10; + mca_sharedfp_sm_priority = 30; (void) mca_base_component_var_register(&mca_sharedfp_sm_component.sharedfpm_version, "priority", "Priority of the sm sharedfp component", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,