Skip to content

Segfault when calling kahypar_partition in parallel #142

Closed
@picsel2

Description

@picsel2

Hi!

I get pretty quickly a segmentation fault, when calling kahypar_partition from multiple threads.

I've pasted a "small" reproducer here: https://gist.github.com/picsel2/d9691b32e93eba5a8481b706860f0ffe

This workaround fixes the failure (at least for me).

Additional info

ThreadSanitizer reports the following:

==================
WARNING: ThreadSanitizer: data race (pid=19893)
  Write of size 8 at 0x7ba800000000 by thread T4:
    #0 operator delete(void*, unsigned long) <null> (libtsan.so.2+0x8782d)
    #1 kahypar::Timer::Timing& std::vector<kahypar::Timer::Timing, std::allocator<kahypar::Timer::Timing> >::emplace_back<kahypar::Context const&, kahypar::Timepoint const&, double const&>(kahypar::Context const&, kahypar::Timepoint const&, double const&) [clone .isra.0] <null> (libkahypar.so+0x1e6d1c)
    #2 main::{lambda()#1}::operator()() const <null> (pdog_user+0x4018dd)
    #3 void std::__invoke_impl<void, main::{lambda()#1}>(std::__invoke_other, main::{lambda()#1}&&) <null> (pdog_user+0x402394)
    #4 std::__invoke_result<main::{lambda()#1}>::type std::__invoke<main::{lambda()#1}>(main::{lambda()#1}&&) <null> (pdog_user+0x402301)
    #5 void std::thread::_Invoker<std::tuple<main::{lambda()#1}> >::_M_invoke<0ul>(std::_Index_tuple<0ul>) <null> (pdog_user+0x402266)
    #6 std::thread::_Invoker<std::tuple<main::{lambda()#1}> >::operator()() <null> (pdog_user+0x402210)
    #7 std::thread::_State_impl<std::thread::_Invoker<std::tuple<main::{lambda()#1}> > >::_M_run() <null> (pdog_user+0x4021ca)
    #8 execute_native_thread_routine <null> (libstdc++.so.6+0xdbc02)

  Previous write of size 8 at 0x7ba800000000 by thread T2:
    [failed to restore the stack]

  Thread T4 (tid=19898, running) created by main thread at:
    #0 pthread_create <null> (libtsan.so.2+0x5f0e6)
    #1 std::thread::_M_start_thread(std::unique_ptr<std::thread::_State, std::default_delete<std::thread::_State> >, void (*)()) <null> (libstdc++.so.6+0xdbcd8)
    #2 void std::__new_allocator<std::thread>::construct<std::thread, main::{lambda()#1} const&>(std::thread*, main::{lambda()#1} const&) <null> (pdog_user+0x401ea3)
    #3 void std::allocator_traits<std::allocator<std::thread> >::construct<std::thread, main::{lambda()#1} const&>(std::allocator<std::thread>&, std::thread*, main::{lambda()#1} const&) <null> (pdog_user+0x401b80)
    #4 std::thread& std::vector<std::thread, std::allocator<std::thread> >::emplace_back<main::{lambda()#1} const&>(main::{lambda()#1} const&) <null> (pdog_user+0x401a96)
    #5 main <null> (pdog_user+0x40194d)

  Thread T2 (tid=19896, running) created by main thread at:
    #0 pthread_create <null> (libtsan.so.2+0x5f0e6)
    #1 std::thread::_M_start_thread(std::unique_ptr<std::thread::_State, std::default_delete<std::thread::_State> >, void (*)()) <null> (libstdc++.so.6+0xdbcd8)
    #2 void std::__new_allocator<std::thread>::construct<std::thread, main::{lambda()#1} const&>(std::thread*, main::{lambda()#1} const&) <null> (pdog_user+0x401ea3)
    #3 void std::allocator_traits<std::allocator<std::thread> >::construct<std::thread, main::{lambda()#1} const&>(std::allocator<std::thread>&, std::thread*, main::{lambda()#1} const&) <null> (pdog_user+0x401b80)
    #4 void std::vector<std::thread, std::allocator<std::thread> >::_M_realloc_insert<main::{lambda()#1} const&>(__gnu_cxx::__normal_iterator<std::thread*, std::vector<std::thread, std::allocator<std::thread> > >, main::{lambda()#1} const&) <null> (pdog_user+0x401c88)
    #5 std::thread& std::vector<std::thread, std::allocator<std::thread> >::emplace_back<main::{lambda()#1} const&>(main::{lambda()#1} const&) <null> (pdog_user+0x401afc)
    #6 main <null> (pdog_user+0x40194d)

SUMMARY: ThreadSanitizer: data race (/lib64/libtsan.so.2+0x8782d) in operator delete(void*, unsigned long)
==================
ThreadSanitizer:DEADLYSIGNAL
==19893==ERROR: ThreadSanitizer: SEGV on unknown address 0x7fcb5f506018 (pc 0x7fcb6a2849f0 bp 0x7fcb6a331bc0 sp 0x7fcb62d16290 T19901)
==19893==The signal is caused by a READ memory access.
    #0 __sanitizer::LargeMmapAllocator<__tsan::MapUnmapCallback, __sanitizer::LargeMmapAllocatorPtrArrayDynamic, __sanitizer::LocalAddressSpaceView>::Deallocate(__sanitizer::AllocatorStats*, void*) <null> (libtsan.so.2+0x849f0)
    #1 __tsan::user_free(__tsan::ThreadState*, unsigned long, void*, bool) <null> (libtsan.so.2+0x83a8d)
    #2 operator delete(void*, unsigned long) <null> (libtsan.so.2+0x87832)
    #3 kahypar::Timer::Timing& std::vector<kahypar::Timer::Timing, std::allocator<kahypar::Timer::Timing> >::emplace_back<kahypar::Context const&, kahypar::Timepoint const&, double const&>(kahypar::Context const&, kahypar::Timepoint const&, double const&) [clone .isra.0] <null> (libkahypar.so+0x1e6d1c)
    #4 kahypar::multilevel::partition(kahypar::ds::GenericHypergraph<unsigned int, unsigned int, int, int, int, kahypar::meta::Empty, kahypar::meta::Empty>&, kahypar::ICoarsener&, kahypar::IRefiner&, kahypar::Context const&) <null> (libkahypar.so+0x2ac116)
    #5 kahypar::recursive_bisection::partition(kahypar::ds::GenericHypergraph<unsigned int, unsigned int, int, int, int, kahypar::meta::Empty, kahypar::meta::Empty>&, kahypar::Context const&) <null> (libkahypar.so+0x2a8ac2)
    #6 kahypar::Partitioner::partition(kahypar::ds::GenericHypergraph<unsigned int, unsigned int, int, int, int, kahypar::meta::Empty, kahypar::meta::Empty>&, kahypar::Context&) <null> (libkahypar.so+0x2c0b02)
    #7 kahypar::PartitionerFacade::partition(kahypar::ds::GenericHypergraph<unsigned int, unsigned int, int, int, int, kahypar::meta::Empty, kahypar::meta::Empty>&, kahypar::Context&) [clone .isra.0] <null> (libkahypar.so+0x203144)
    #8 kahypar_partition <null> (libkahypar.so+0x203b7f)
    #9 run_kahypar() <null> (pdog_user+0x401809)
    #10 main::{lambda()#1}::operator()() const <null> (pdog_user+0x4018dd)
    #11 void std::__invoke_impl<void, main::{lambda()#1}>(std::__invoke_other, main::{lambda()#1}&&) <null> (pdog_user+0x402394)
    #12 std::__invoke_result<main::{lambda()#1}>::type std::__invoke<main::{lambda()#1}>(main::{lambda()#1}&&) <null> (pdog_user+0x402301)
    #13 void std::thread::_Invoker<std::tuple<main::{lambda()#1}> >::_M_invoke<0ul>(std::_Index_tuple<0ul>) <null> (pdog_user+0x402266)
    #14 std::thread::_Invoker<std::tuple<main::{lambda()#1}> >::operator()() <null> (pdog_user+0x402210)
    #15 std::thread::_State_impl<std::thread::_Invoker<std::tuple<main::{lambda()#1}> > >::_M_run() <null> (pdog_user+0x4021ca)
    #16 execute_native_thread_routine <null> (libstdc++.so.6+0xdbc02)
    #17 __tsan_thread_start_func <null> (libtsan.so.2+0x393ef)
    #18 start_thread <null> (libc.so.6+0x8b12c)
    #19 __clone3 <null> (libc.so.6+0x10cbbf)

ThreadSanitizer can not provide additional info.
SUMMARY: ThreadSanitizer: SEGV (/lib64/libtsan.so.2+0x849f0) in __sanitizer::LargeMmapAllocator<__tsan::MapUnmapCallback, __sanitizer::LargeMmapAllocatorPtrArrayDynamic, __sanitizer::LocalAddressSpaceView>::Deallocate(__sanitizer::AllocatorStats*, void*)
==19893==ABORTING

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions