Skip to content

Commit

Permalink
[core optimize] SignalSafe::ReuseEmptyList() avoids many allocations (#…
Browse files Browse the repository at this point in the history
…1509)

We TakePendingSignals() in the main interpreter loop, which shows up on
benchmarks/compute/fib.sh.

This change lets us avoid ~3K of ~150K allocations on that benchmark, or
about 2%.  These allocations also wouldn't fit in the proposed pool
allocator for small objects, so they would result in a malloc() and
free(), which is slow.

Also shows up on benchmarks/gc and benchmarks/osh-runtime.
  • Loading branch information
andychu committed Feb 20, 2023
1 parent b1bfa55 commit ea338d9
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 8 deletions.
3 changes: 2 additions & 1 deletion benchmarks/compute.sh
Expand Up @@ -310,7 +310,7 @@ task-all() {
case $task_name in
(hello|fib)
# Run it DIRECTLY, do not run $0. Because we do NOT want to fork bash
# than dash, because bash uses more memory.
# then dash, because bash uses more memory.
cmd=($runtime benchmarks/compute/$task_name.$(ext $runtime) "$arg1" "$arg2")
;;
(*)
Expand Down Expand Up @@ -382,6 +382,7 @@ measure() {

mkdir -p $BASE_DIR/{tmp,raw,stage1} $out_dir

# set -x
hello-all $provenance $host_job_id $out_dir
fib-all $provenance $host_job_id $out_dir

Expand Down
5 changes: 5 additions & 0 deletions core/pyos.py
Expand Up @@ -364,6 +364,11 @@ def TakePendingSignals(self):
self.pending_signals = new_queue
return ret

def ReuseEmptyList(self, empty_list):
# type: (List[int]) -> None
"""This optimization only happens in C++."""
pass


gSignalSafe = None # type: SignalSafe

Expand Down
22 changes: 18 additions & 4 deletions cpp/core.h
Expand Up @@ -111,6 +111,7 @@ class SignalSafe {
SignalSafe()
: GC_CLASS_FIXED(header_, field_mask(), sizeof(SignalSafe)),
pending_signals_(AllocSignalList()),
empty_list_(AllocSignalList()), // to avoid repeated allocation
last_sig_num_(0),
received_sigint_(false),
received_sigwinch_(false),
Expand Down Expand Up @@ -147,13 +148,24 @@ class SignalSafe {

// Main thread takes signals so it can run traps.
List<int>* TakePendingSignals() {
// TODO: Consider ReuseEmptyList() to avoid allocation
List<int>* new_queue = AllocSignalList();
List<int>* ret = pending_signals_;
pending_signals_ = new_queue;

// Make sure we have a distinct list to reuse.
DCHECK(empty_list_ != pending_signals_);
pending_signals_ = empty_list_;

return ret;
}

// Main thread returns the same list as an optimization to avoid allocation.
void ReuseEmptyList(List<int>* empty_list) {
DCHECK(empty_list != pending_signals_); // must be different
DCHECK(len(empty_list) == 0); // main thread clears
DCHECK(empty_list->capacity_ == kMaxPendingSignals);

empty_list_ = empty_list;
}

// Main thread wants to get the last signal received.
int LastSignal() {
#if LOCK_FREE_ATOMICS
Expand Down Expand Up @@ -185,11 +197,13 @@ class SignalSafe {
}

static constexpr uint16_t field_mask() {
return maskbit(offsetof(SignalSafe, pending_signals_));
return maskbit(offsetof(SignalSafe, pending_signals_)) |
maskbit(offsetof(SignalSafe, empty_list_));
}

GC_OBJ(header_);
List<int>* pending_signals_; // public for testing
List<int>* empty_list_;

private:
// Enforce private state because two different "threads" will use it!
Expand Down
5 changes: 5 additions & 0 deletions cpp/core_test.cc
Expand Up @@ -212,6 +212,7 @@ TEST signal_test() {
List<int>* q = signal_safe->TakePendingSignals();
ASSERT(q != nullptr);
ASSERT_EQ(0, len(q));
signal_safe->ReuseEmptyList(q);
}

pid_t mypid = getpid();
Expand All @@ -231,6 +232,9 @@ TEST signal_test() {
ASSERT_EQ(2, len(q));
ASSERT_EQ(SIGUSR1, q->index_(0));
ASSERT_EQ(SIGUSR2, q->index_(1));

q->clear();
signal_safe->ReuseEmptyList(q);
}

pyos::Sigaction(SIGUSR1, SIG_IGN);
Expand All @@ -239,6 +243,7 @@ TEST signal_test() {
List<int>* q = signal_safe->TakePendingSignals();
ASSERT(q != nullptr);
ASSERT(len(q) == 0);
signal_safe->ReuseEmptyList(q);
}
pyos::Sigaction(SIGUSR2, SIG_IGN);

Expand Down
8 changes: 7 additions & 1 deletion cpp/data_race_test.cc
Expand Up @@ -88,7 +88,13 @@ TEST take_pending_signals_test() {

// Concurrent access in main thread
List<int>* received = signal_safe.TakePendingSignals();
log("received %d signals", len(received));
log("(1) received %d signals", len(received));

received->clear();
signal_safe.ReuseEmptyList(received);

received = signal_safe.TakePendingSignals();
log("(2) received %d signals", len(received));

pthread_join(t, 0);

Expand Down
8 changes: 6 additions & 2 deletions osh/builtin_trap.py
Expand Up @@ -116,14 +116,18 @@ def InitInteractiveShell(self, display, my_pid):
def GetPendingTraps(self):
# type: () -> List[command_t]
"""Transfer ownership of the current queue of pending trap handlers to the caller."""
sig_queue = self.signal_safe.TakePendingSignals()
signals = self.signal_safe.TakePendingSignals()

run_list = [] # type: List[command_t]
for sig_num in sig_queue:
for sig_num in signals:
node = self.traps.get(sig_num, None)
if node is not None:
run_list.append(node)

# Optimization to avoid allocation in the main loop.
del signals[:]
self.signal_safe.ReuseEmptyList(signals)

return run_list


Expand Down

0 comments on commit ea338d9

Please sign in to comment.