Skip to content

Commit

Permalink
Don't make pinToCPU default.
Browse files Browse the repository at this point in the history
Causes issues when multiple nimbus single-threaded instances are launched on the same machine
  • Loading branch information
mratsim committed Sep 21, 2021
1 parent 39b90fa commit 2eb22c6
Showing 1 changed file with 31 additions and 13 deletions.
44 changes: 31 additions & 13 deletions taskpools/taskpools.nim
Original file line number Diff line number Diff line change
Expand Up @@ -345,9 +345,23 @@ proc syncAll*(pool: Taskpool) {.raises: [Exception].} =
# Runtime
# ---------------------------------------------

proc new*(T: type Taskpool, numThreads = countProcessors()): T {.raises: [Exception].} =
proc new*(T: type Taskpool, numThreads = countProcessors(), pinToCPU = false): T {.raises: [Exception].} =
## Initialize a threadpool that manages `numThreads` threads.
## Default to the number of logical processors available.
##
## If pinToCPU is set, threads spawned will be pinned to the core that spawned them.
## This improves performance of memory-intensive workloads by avoiding
## thrashing and reloading core caches when a thread moves around.
##
## pinToCPU option is ignored in:
## - In C++ compilation with Microsoft Visual Studio Compiler
## - MacOS
## - Android
#
# pinToCPU Status:
# - C++ MSVC: implementation missing (need to wrap reinterpret_cast)
# - Android: API missing and unclear benefits due to Big.Little architecture
# - MacOS: API missing

type TpObj = typeof(default(Taskpool)[])
# Event notifier requires an extra 64 bytes for alignment
Expand All @@ -363,22 +377,26 @@ proc new*(T: type Taskpool, numThreads = countProcessors()): T {.raises: [Except
# Setup master thread
workerContext.id = 0
workerContext.taskpool = tp
when not(defined(cpp) and defined(vcc)):
# TODO: Nim casts between Windows Handles but that requires reinterpret cast for C++
pinToCpu(0)

if pinToCPU:
when not(defined(cpp) and defined(vcc)):
# TODO: Nim casts between Windows Handles but that requires reinterpret cast for C++
pinToCpu(0)

# Start worker threads
for i in 1 ..< numThreads:
createThread(tp.workers[i], worker_entry_fn, (tp, WorkerID(i)))
# TODO: we might want to take into account Hyper-Threading (HT)
# and allow spawning tasks and pinning to cores that are not HT-siblings.
# This is important for memory-bound workloads (like copy, addition, ...)
# where both sibling cores will compete for L1 and L2 cache, effectively
# halving the memory bandwidth or worse, flushing what the other put in cache.
# Note that while 2x siblings is common, Xeon Phi has 4x Hyper-Threading.
when not(defined(cpp) and defined(vcc)):
# TODO: Nim casts between Windows Handles but that requires reinterpret cast for C++
pinToCpu(tp.workers[i], i)

if pinToCPU:
# TODO: we might want to take into account Hyper-Threading (HT)
# and allow spawning tasks and pinning to cores that are not HT-siblings.
# This is important for memory-bound workloads (like copy, addition, ...)
# where both sibling cores will compete for L1 and L2 cache, effectively
# halving the memory bandwidth or worse, flushing what the other put in cache.
# Note that while 2x siblings is common, Xeon Phi has 4x Hyper-Threading.
when not(defined(cpp) and defined(vcc)):
# TODO: Nim casts between Windows Handles but that requires reinterpret cast for C++
pinToCpu(tp.workers[i], i)

# Root worker
setupWorker()
Expand Down

0 comments on commit 2eb22c6

Please sign in to comment.