From ded2c1ad37a8532867b29b2935255e609ce22c46 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Thu, 12 Jan 2017 09:59:51 -0700 Subject: [PATCH] rcache/vma: do not release vma stuctures in vma_tree_delete This commit fixes a deadlock that can occur when the libc version holds a lock when calling munmap. In this case we could end up calling free() from vma_tree_delete which would in turn try to obtain the lock in libc. To avoid the issue put any deleted vma's in a new list on the vma module and release them on the next call to vma_tree_insert. This should be safe as this function is not called from the memory hooks. Backported from 79cabc92fd0bc5307649597f5ef9c6006d193f5d Fixes #1654 Signed-off-by: Nathan Hjelm --- opal/mca/rcache/vma/rcache_vma.h | 3 ++- opal/mca/rcache/vma/rcache_vma_tree.c | 34 +++++++++++++++++++-------- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/opal/mca/rcache/vma/rcache_vma.h b/opal/mca/rcache/vma/rcache_vma.h index 984a05eb4ee..7884d19338b 100644 --- a/opal/mca/rcache/vma/rcache_vma.h +++ b/opal/mca/rcache/vma/rcache_vma.h @@ -13,7 +13,7 @@ * * Copyright (c) 2006 Voltaire. All rights reserved. * Copyright (c) 2009 IBM Corporation. All rights reserved. - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * * $COPYRIGHT$ @@ -40,6 +40,7 @@ struct mca_rcache_vma_module_t { mca_rcache_base_module_t base; opal_rb_tree_t rb_tree; opal_list_t vma_list; + opal_list_t vma_gc_list; size_t reg_cur_cache_size; }; typedef struct mca_rcache_vma_module_t mca_rcache_vma_module_t; diff --git a/opal/mca/rcache/vma/rcache_vma_tree.c b/opal/mca/rcache/vma/rcache_vma_tree.c index 9891f57e1a4..18e80a919cd 100644 --- a/opal/mca/rcache/vma/rcache_vma_tree.c +++ b/opal/mca/rcache/vma/rcache_vma_tree.c @@ -16,7 +16,7 @@ * Copyright (c) 2009 IBM Corporation. All rights reserved. * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -265,6 +265,7 @@ int mca_rcache_vma_tree_init(mca_rcache_vma_module_t* rcache) { OBJ_CONSTRUCT(&rcache->rb_tree, opal_rb_tree_t); OBJ_CONSTRUCT(&rcache->vma_list, opal_list_t); + OBJ_CONSTRUCT(&rcache->vma_gc_list, opal_list_t); rcache->reg_cur_cache_size = 0; return opal_rb_tree_init(&rcache->rb_tree, mca_rcache_vma_tree_node_compare); @@ -276,6 +277,23 @@ void mca_rcache_vma_tree_finalize(mca_rcache_vma_module_t* rcache) mca_rcache_vma_tree_node_compare); OBJ_DESTRUCT(&rcache->vma_list); OBJ_DESTRUCT(&rcache->rb_tree); + OPAL_LIST_DESTRUCT(&rcache->vma_gc_list); +} + +/** + * Clean the vma garbage collection list + * + * This function releases any deleted vma structures. It MUST be called + * with the VMA lock help and MAY NOT be called from a function that can + * be called by a memory hook. + */ +static void mca_rcache_vma_gc_clean (mca_rcache_vma_module_t* rcache) +{ + opal_list_item_t *item; + + while (NULL != (item = opal_list_remove_first (&rcache->vma_gc_list))) { + OBJ_RELEASE(item); + } } mca_mpool_base_registration_t *mca_rcache_vma_tree_find( @@ -455,6 +473,8 @@ int mca_rcache_vma_tree_insert(mca_rcache_vma_module_t* vma_rcache, opal_mutex_lock (&vma_rcache->base.lock); + mca_rcache_vma_gc_clean (vma_rcache); + i = (mca_rcache_vma_t*)opal_rb_tree_find_with(&vma_rcache->rb_tree, (void*)begin, mca_rcache_vma_tree_node_compare_closest); @@ -553,7 +573,6 @@ int mca_rcache_vma_tree_insert(mca_rcache_vma_module_t* vma_rcache, int mca_rcache_vma_tree_delete(mca_rcache_vma_module_t* vma_rcache, mca_mpool_base_registration_t* reg) { - opal_list_t deleted_vmas; mca_rcache_vma_t *vma; vma = (mca_rcache_vma_t*)opal_rb_tree_find_with(&vma_rcache->rb_tree, reg->base, @@ -564,8 +583,6 @@ int mca_rcache_vma_tree_delete(mca_rcache_vma_module_t* vma_rcache, opal_mutex_lock (&vma_rcache->base.lock); - OBJ_CONSTRUCT(&deleted_vmas, opal_list_t); - while(vma != (mca_rcache_vma_t*)opal_list_get_end(&vma_rcache->vma_list) && vma->start <= (uintptr_t)reg->bound) { mca_rcache_vma_remove_reg(vma, reg); @@ -576,7 +593,7 @@ int mca_rcache_vma_tree_delete(mca_rcache_vma_module_t* vma_rcache, mca_rcache_vma_update_byte_count(vma_rcache, vma->start - vma->end - 1); opal_list_remove_item(&vma_rcache->vma_list, &vma->super); - opal_list_append(&deleted_vmas, &vma->super); + opal_list_append(&vma_rcache->vma_gc_list, &vma->super); vma = next; } else { int merged; @@ -593,7 +610,7 @@ int mca_rcache_vma_tree_delete(mca_rcache_vma_module_t* vma_rcache, prev->end = vma->end; opal_list_remove_item(&vma_rcache->vma_list, &vma->super); opal_rb_tree_delete(&vma_rcache->rb_tree, vma); - opal_list_append(&deleted_vmas, &vma->super); + opal_list_append(&vma_rcache->vma_gc_list, &vma->super); vma = prev; merged = 1; } @@ -606,7 +623,7 @@ int mca_rcache_vma_tree_delete(mca_rcache_vma_module_t* vma_rcache, vma->end = next->end; opal_list_remove_item(&vma_rcache->vma_list, &next->super); opal_rb_tree_delete(&vma_rcache->rb_tree, next); - opal_list_append(&deleted_vmas, &next->super); + opal_list_append(&vma_rcache->vma_gc_list, &next->super); merged = 1; } } while(merged); @@ -616,9 +633,6 @@ int mca_rcache_vma_tree_delete(mca_rcache_vma_module_t* vma_rcache, opal_mutex_unlock (&vma_rcache->base.lock); - /* actually free vmas now that the lock has been dropped */ - OPAL_LIST_DESTRUCT(&deleted_vmas); - return 0; }