From 79cabc92fd0bc5307649597f5ef9c6006d193f5d Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 10 Jan 2017 16:58:07 -0700 Subject: [PATCH] rcache/base: do not release vma stuctures in vma_tree_delete This commit fixes a deadlock that can occur when the libc version holds a lock when calling munmap. In this case we could end up calling free() from vma_tree_delete which would in turn try to obtain the lock in libc. To avoid the issue put any deleted vma's in a new list on the vma module and release them on the next call to vma_tree_insert. This should be safe as this function is not called from the memory hooks. Signed-off-by: Nathan Hjelm --- opal/mca/rcache/base/rcache_base_vma.h | 3 +- opal/mca/rcache/base/rcache_base_vma_tree.c | 32 ++++++++++++++------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/opal/mca/rcache/base/rcache_base_vma.h b/opal/mca/rcache/base/rcache_base_vma.h index a5531f2c270..48a9c606c70 100644 --- a/opal/mca/rcache/base/rcache_base_vma.h +++ b/opal/mca/rcache/base/rcache_base_vma.h @@ -13,7 +13,7 @@ * * Copyright (c) 2006 Voltaire. All rights reserved. * Copyright (c) 2009 IBM Corporation. All rights reserved. - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * * $COPYRIGHT$ @@ -44,6 +44,7 @@ struct mca_rcache_base_vma_module_t { opal_object_t super; opal_rb_tree_t rb_tree; opal_list_t vma_list; + opal_list_t vma_gc_list; size_t reg_cur_cache_size; opal_mutex_t vma_lock; }; diff --git a/opal/mca/rcache/base/rcache_base_vma_tree.c b/opal/mca/rcache/base/rcache_base_vma_tree.c index cdc10212da6..cdaee9b239a 100644 --- a/opal/mca/rcache/base/rcache_base_vma_tree.c +++ b/opal/mca/rcache/base/rcache_base_vma_tree.c @@ -16,7 +16,7 @@ * Copyright (c) 2009 IBM Corporation. All rights reserved. * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -241,6 +241,7 @@ int mca_rcache_base_vma_tree_init (mca_rcache_base_vma_module_t *vma_module) { OBJ_CONSTRUCT(&vma_module->rb_tree, opal_rb_tree_t); OBJ_CONSTRUCT(&vma_module->vma_list, opal_list_t); + OBJ_CONSTRUCT(&vma_module->vma_gc_list, opal_list_t); vma_module->reg_cur_cache_size = 0; return opal_rb_tree_init (&vma_module->rb_tree, mca_rcache_base_vma_tree_node_compare); } @@ -249,6 +250,7 @@ void mca_rcache_base_vma_tree_finalize (mca_rcache_base_vma_module_t *vma_module { opal_rb_tree_init(&vma_module->rb_tree, mca_rcache_base_vma_tree_node_compare); OBJ_DESTRUCT(&vma_module->vma_list); + OBJ_DESTRUCT(&vma_module->vma_gc_list); OBJ_DESTRUCT(&vma_module->rb_tree); } @@ -412,6 +414,20 @@ static inline int mca_rcache_base_vma_can_insert (mca_rcache_base_vma_module_t * return (0 == limit || vma_module->reg_cur_cache_size + nbytes <= limit); } +/** + * Free deleted vmas. This can not be done when they are deleted without running + * into deadlock problems with some libc versions. The caller MUST hold the vma_lock + * when calling this function. + */ +static void mca_rcache_base_vma_cleanup (mca_rcache_base_vma_module_t *vma_module) +{ + opal_list_item_t *item; + + while (NULL != (item = opal_list_remove_first (&vma_module->vma_gc_list))) { + OBJ_RELEASE(item); + } +} + int mca_rcache_base_vma_tree_insert (mca_rcache_base_vma_module_t *vma_module, mca_rcache_base_registration_t *reg, size_t limit) { @@ -420,6 +436,8 @@ int mca_rcache_base_vma_tree_insert (mca_rcache_base_vma_module_t *vma_module, opal_mutex_lock (&vma_module->vma_lock); + mca_rcache_base_vma_cleanup (vma_module); + i = (mca_rcache_base_vma_item_t *) opal_rb_tree_find_with (&vma_module->rb_tree, (void *) begin, mca_rcache_base_vma_tree_node_compare_closest); @@ -529,7 +547,6 @@ int mca_rcache_base_vma_tree_delete (mca_rcache_base_vma_module_t *vma_module, mca_rcache_base_registration_t *reg) { mca_rcache_base_vma_item_t *vma; - opal_list_t deleted_vmas; opal_mutex_lock (&vma_module->vma_lock); @@ -542,8 +559,6 @@ int mca_rcache_base_vma_tree_delete (mca_rcache_base_vma_module_t *vma_module, return OPAL_ERROR; } - OBJ_CONSTRUCT(&deleted_vmas, opal_list_t); - while (vma != (mca_rcache_base_vma_item_t *) opal_list_get_end (&vma_module->vma_list) && vma->start <= (uintptr_t) reg->bound) { mca_rcache_base_vma_remove_reg(vma, reg); @@ -555,7 +570,7 @@ int mca_rcache_base_vma_tree_delete (mca_rcache_base_vma_module_t *vma_module, mca_rcache_base_vma_update_byte_count (vma_module, vma->start - vma->end - 1); opal_list_remove_item (&vma_module->vma_list, &vma->super); - opal_list_append (&deleted_vmas, &vma->super); + opal_list_append (&vma_module->vma_gc_list, &vma->super); vma = next; } else { int merged; @@ -573,7 +588,7 @@ int mca_rcache_base_vma_tree_delete (mca_rcache_base_vma_module_t *vma_module, prev->end = vma->end; opal_list_remove_item(&vma_module->vma_list, &vma->super); opal_rb_tree_delete(&vma_module->rb_tree, vma); - opal_list_append (&deleted_vmas, &vma->super); + opal_list_append (&vma_module->vma_gc_list, &vma->super); vma = prev; merged = 1; } @@ -587,7 +602,7 @@ int mca_rcache_base_vma_tree_delete (mca_rcache_base_vma_module_t *vma_module, vma->end = next->end; opal_list_remove_item(&vma_module->vma_list, &next->super); opal_rb_tree_delete(&vma_module->rb_tree, next); - opal_list_append (&deleted_vmas, &next->super); + opal_list_append (&vma_module->vma_gc_list, &next->super); merged = 1; } } while (merged); @@ -598,9 +613,6 @@ int mca_rcache_base_vma_tree_delete (mca_rcache_base_vma_module_t *vma_module, opal_mutex_unlock (&vma_module->vma_lock); - /* actually free vmas now that the lock has been dropped */ - OPAL_LIST_DESTRUCT(&deleted_vmas); - return 0; }