From b7001e70798fa473955523e3c1af07e5194d6c49 Mon Sep 17 00:00:00 2001 From: JordanHendersonMusic Date: Sun, 21 Apr 2024 15:32:00 +0100 Subject: [PATCH] Sclang: PyrGC: Insert limit on number of un-scanned objects. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When there are a large number of temporary objects mNumToScan can overflow leading to undefined behaviour (from the overflow of a signed int) and the implementation defined behaviour of right shifting a negative value, creating a GC bug that only appears on some systems, and incorrect, potentially leaking, but non-crashing behaviour on others. This code cause a crash on some systems. ```supercollider b = Pwhite.new(0, 10, inf).asStream; Array.fill(1810000, {b.next});` ``` This commit places an upper limit on the number of objects to scan (5'000'000). Note, SC uses an incremental allocator, meaning this isn't a limit on the amount of memory, but amount of unchecked memory. While all allocations should cause a collection, when PyrParseNode.cpp creates a PyrMethod, sometimes it sets 'needsHeapContext' to false which disables collection. This is (presumably) done for optimization. What this fails to consider is that frames in supercollider are themselves heap allocated, meaning, no matter how small, they increase mNumToScan, leading to the above bug. While a change to PyrParseNode.cpp could be possible, the code is without comments and very complex. Setting a limit on mNumToScan has a potential minuscule impact on performance, as the additional scan happens only rarely, and in some cases, actually improves performance as the GC is allowed to recycle the memory, reducing the number of calls to malloc — this is only noticeable in tight loops with many iterations that create functions. --- lang/LangSource/GC.cpp | 4 ++++ lang/LangSource/GC.h | 15 +++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/lang/LangSource/GC.cpp b/lang/LangSource/GC.cpp index bf2bcfa82d1..cd0a7d9659a 100644 --- a/lang/LangSource/GC.cpp +++ b/lang/LangSource/GC.cpp @@ -664,6 +664,10 @@ HOT void PyrGC::Collect() { if (mNumToScan > 0) { // post("->Collect ns %d ng %d s %d\n", mNumToScan, mNumGrey, mScans); // DumpInfo(); + + // This operation can easily overflow if mNumToScan is large causing undefined behaviour. + // Right shifting a negative is also implementation defined and has led to bugs that only appear on certain + // operating systems and hardware. mNumToScan += mNumToScan >> 3; // post("->Collect2 ns %d ng %d s %d\n", mNumToScan, mNumGrey, mScans); diff --git a/lang/LangSource/GC.h b/lang/LangSource/GC.h index 512917963d8..3842182b5b9 100644 --- a/lang/LangSource/GC.h +++ b/lang/LangSource/GC.h @@ -303,9 +303,20 @@ inline void PyrGC::ToGrey2(PyrObjectHdr* obj) { } inline PyrObject* PyrGC::Allocate(size_t inNumBytes, int32 sizeclass, bool inRunCollection) { - if (inRunCollection && mNumToScan >= kScanThreshold) + // When allocating a large number of objects, mNumToScan (int32) can overflow. + // This usually happens when creating temporary frames, which might not be collected until later. + // This is designed to force collection when there might be a large number of temporary objects. + // The threshold is somewhat arbitrary and could be made larger, but it is better to be safe than fast, + // and sclang is not designed nor optimized to process large amounts of data. + const bool aboveTemporaryObjectLimit = mNumToScan > 5'000'000; + + // All allocations should set inRunCollection to true, but some types of methods do not as a part of an + // optimization. Don't scan an insignificant amount of memory, wait until above kScanThreshold. + const bool shouldCollect = inRunCollection && mNumToScan >= kScanThreshold; + + if (aboveTemporaryObjectLimit || shouldCollect) { Collect(); - else { + } else { if (inRunCollection) mUncollectedAllocations = 0; else