Skip to content

Commit b045cfa

Browse files
committed
Avoid a double lock interlock when calling pmix_finalize
Signed-off-by: Aurelien Bouteiller <bouteill@icl.utk.edu> Signed-off-by: Aurélien Bouteiller <bouteill@icl.utk.edu>
1 parent e6f7f87 commit b045cfa

File tree

2 files changed

+28
-5
lines changed

2 files changed

+28
-5
lines changed

opal/mca/pmix/pmix2x/pmix2x_client.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
99
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
1010
* reserved.
11+
* Copyright (c) 2018 The University of Tennessee and The University
12+
* of Tennessee Research Foundation. All rights
13+
* reserved.
1114
* $COPYRIGHT$
1215
*
1316
* Additional copyrights may follow
@@ -165,6 +168,8 @@ int pmix2x_client_finalize(void)
165168
{
166169
pmix_status_t rc;
167170
opal_pmix2x_event_t *event, *ev2;
171+
opal_list_t evlist;
172+
OBJ_CONSTRUCT(&evlist, opal_list_t);
168173

169174
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
170175
"PMIx_client finalize");
@@ -178,12 +183,19 @@ int pmix2x_client_finalize(void)
178183
OPAL_PMIX_DESTRUCT_LOCK(&event->lock);
179184
OPAL_PMIX_CONSTRUCT_LOCK(&event->lock);
180185
PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event);
181-
OPAL_PMIX_WAIT_THREAD(&event->lock);
182186
opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super);
183-
OBJ_RELEASE(event);
187+
/* wait and release outside the loop to avoid double mutex
188+
* interlock */
189+
opal_list_append(&evlist, &event->super);
184190
}
185191
}
186192
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
193+
OPAL_LIST_FOREACH_SAFE(event, ev2, &evlist, opal_pmix2x_event_t) {
194+
OPAL_PMIX_WAIT_THREAD(&event->lock);
195+
opal_list_remove_item(&evlist, &event->super);
196+
OBJ_RELEASE(event);
197+
}
198+
OBJ_DESTRUCT(&evlist);
187199
rc = PMIx_Finalize(NULL, 0);
188200

189201
return pmix2x_convert_rc(rc);

opal/mca/pmix/pmix2x/pmix2x_server_south.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
1010
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
1111
* reserved.
12+
* Copyright (c) 2018 The University of Tennessee and The University
13+
* of Tennessee Research Foundation. All rights
14+
* reserved.
1215
* $COPYRIGHT$
1316
*
1417
* Additional copyrights may follow
@@ -180,6 +183,8 @@ int pmix2x_server_finalize(void)
180183
{
181184
pmix_status_t rc;
182185
opal_pmix2x_event_t *event, *ev2;
186+
opal_list_t evlist;
187+
OBJ_CONSTRUCT(&evlist, opal_list_t);
183188

184189
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
185190
--opal_pmix_base.initialized;
@@ -190,13 +195,19 @@ int pmix2x_server_finalize(void)
190195
OPAL_PMIX_DESTRUCT_LOCK(&event->lock);
191196
OPAL_PMIX_CONSTRUCT_LOCK(&event->lock);
192197
PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event);
193-
OPAL_PMIX_WAIT_THREAD(&event->lock);
194198
opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super);
195-
OBJ_RELEASE(event);
199+
/* wait and release outside the loop to avoid double mutex
200+
* interlock */
201+
opal_list_append(&evlist, &event->super);
196202
}
197203
}
198204
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
199-
205+
OPAL_LIST_FOREACH_SAFE(event, ev2, &evlist, opal_pmix2x_event_t) {
206+
OPAL_PMIX_WAIT_THREAD(&event->lock);
207+
opal_list_remove_item(&evlist, &event->super);
208+
OBJ_RELEASE(event);
209+
}
210+
OBJ_DESTRUCT(&evlist);
200211
rc = PMIx_server_finalize();
201212
return pmix2x_convert_rc(rc);
202213
}

0 commit comments

Comments
 (0)