Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
<
/// Fast Memory Manager for FPC x86_64
// - this unit is a part of the Open Source Synopse mORMot framework 2,
// licensed under a MPL/GPL/LGPL three license - see LICENSE.md
unit mormot.core.fpcx64mm;
{
*****************************************************************************
A Multi-thread Friendly Memory Manager for FPC written in x86_64 assembly
- targetting Linux (and Windows) multi-threaded Services
- only for FPC on the x86_64 target - use the RTL MM on Delphi or ARM
- based on proven FastMM4 by Pierre le Riche - with tuning and enhancements
- can report detailed statistics (with threads contention and memory leaks)
- three app modes: default mono-thread friendly, FPCMM_SERVER or FPCMM_BOOST
Usage: include this unit as the very first in your FPC project uses clause
Why another Memory Manager on FPC?
- The built-in heap.inc is well written and cross-platform and cross-CPU,
but its threadvar arena for small blocks tends to consume a lot of memory
on multi-threaded servers, and has suboptimal allocation performance
- C memory managers (glibc, Intel TBB, jemalloc) have a very high RAM
consumption (especially Intel TBB) and do panic/SIGKILL on any GPF
- Pascal alternatives (FastMM4,ScaleMM2,BrainMM) are Windows+Delphi specific
- Our lockess round-robin of tiny blocks and freemem bin list are unique
algorithms among Memory Managers, and match modern CPUs and workloads
- It was so fun diving into SSE2 x86_64 assembly and Pierre's insight
- Resulting code is still easy to understand and maintain
DISCLAMER: seems stable on Linux and Win64 but feedback is welcome!
*****************************************************************************
}
{ ---- Ready-To-Use Scenarios for Memory Manager Tuning }
{
TL;DR:
1. default settings target LCL/console mono-threaded apps;
2. define FPCMM_SERVER for a multi-threaded service/daemon.
}
// target a multi-threaded service on a modern CPU
// - define FPCMM_DEBUG, FPCMM_ASSUMEMULTITHREAD, FPCMM_ERMS, FPCMM_LOCKLESSFREE
// - currently mormot2tests run with no sleep when FPCMM_SERVER is set :)
// - you may try to define FPCMM_BOOST for even more aggressive settings.
{.$define FPCMM_SERVER}
// increase settings for very aggressive multi-threaded process
// - try to enable it if unexpected SmallGetmemSleepCount/SmallFreememSleepCount
// and SleepCount/SleepCycles contentions are reported by CurrentHeapStatus;
// - tiny blocks will be <= 256 bytes (instead of 128 bytes);
// - FPCMM_BOOSTER will use 2x more tiny blocks arenas - likely to be wasteful;
// - will enable FPCMM_SMALLNOTWITHMEDIUM trying to reduce medium sleeps;
// - warning: depending on the workload and hardware, it may actually be slower,
// triggering more Medium arena contention, and consuming more RAM: consider
// FPCMM_SERVER as a fair alternative.
{.$define FPCMM_BOOST}
{.$define FPCMM_BOOSTER}
{ ---- Fine Grained Memory Manager Tuning }
// includes more detailed information to WriteHeapStatus()
{.$define FPCMM_DEBUG}
// on thread contention, don't spin executing "pause" but directly call Sleep()
// - may help on a single core CPU, or for very specific workloads
{.$define FPCMM_NOPAUSE}
// let FPCMM_DEBUG include SleepCycles information from rdtsc
// and FPCMM_PAUSE call rdtsc for its spinnning loop
// - since rdtsc is emulated so unrealiable on VM, it is disabled by default
{.$define FPCMM_SLEEPTSC}
// checks leaks and write them to the console at process shutdown
// - only basic information will be included: more debugging information (e.g.
// call stack) may be gathered using heaptrc or valgrid
{.$define FPCMM_REPORTMEMORYLEAKS}
// won't check the IsMultiThread global, but assume it is true
// - multi-threaded apps (e.g. a Server Daemon instance) will be faster with it
// - mono-threaded (console/LCL) apps are faster without this conditional
{.$define FPCMM_ASSUMEMULTITHREAD}
// let Freemem multi-thread contention use a lockless algorithm
// - on contention, Freemem won't yield the thread using an OS call, but fill
// an internal Bin list which will be released when the lock becomes available
// - beneficial from our tests on high thread contention (HTTP/REST server)
{.$define FPCMM_LOCKLESSFREE}
// won't use mremap but a regular getmem/move/freemem pattern for large blocks
// - depending on the actual system (e.g. on a VM), mremap may be slower
// - will disable Linux mremap() or Windows following block VirtualQuery/Alloc
{.$define FPCMM_NOMREMAP}
// customize mmap() allocation strategy
{.$define FPCMM_MEDIUM32BIT} // enable MAP_32BIT for AllocMedium() on Linux
{.$define FPCMM_LARGEBIGALIGN} // align large chunks to 21-bit=2MB=PMD_SIZE
// force the tiny/small blocks to be in their own arena, not with medium blocks
// - would use a little more memory, but medium pool is less likely to sleep
// - not defined for FPCMM_SERVER because no performance difference was found
{.$define FPCMM_SMALLNOTWITHMEDIUM}
// use "rep movsb/stosd" ERMS for blocks > 256 bytes instead of SSE2 "movaps"
// - ERMS is available since Ivy Bridge, and we use "movaps" for smallest blocks
// (to not slow down older CPUs), so it is safe to enable this on FPCMM_SERVER
{.$define FPCMM_ERMS}
// try "cmp" before "lock cmpxchg" for old processors with huge lock penalty
{.$define FPCMM_CMPBEFORELOCK}
// will export libc-like functions, and not replace the FPC MM
// - e.g. to use this unit as a stand-alone C memory allocator
{.$define FPCMM_STANDALONE}
// this whole unit will compile as void
// - may be defined e.g. when compiled as Design-Time Lazarus package
{.$define FPCMM_DISABLE}
interface
{$ifdef FPC}
// cut-down version of mormot.defines.inc to make this unit standalone
{$mode Delphi}
{$inline on}
{$R-} // disable Range checking
{$S-} // disable Stack checking
{$W-} // disable stack frame generation
{$Q-} // disable overflow checking
{$B-} // expect short circuit boolean
{$ifdef CPUX64}
{$define FPCX64MM} // this unit is for FPC + x86_64 only
{$asmmode Intel}
{$endif CPUX64}
{$ifdef OLDLINUXKERNEL}
{$define FPCMM_NOMREMAP}
{$endif OLDLINUXKERNEL}
{$ifdef FPCMM_BOOSTER}
{$define FPCMM_BOOST}
{$define FPCMM_SMALLNOTWITHMEDIUM}
{$endif FPCMM_BOOSTER}
{$ifdef FPCMM_BOOST}
{$define FPCMM_SERVER}
{$define FPCMM_LARGEBIGALIGN} // bigger blocks implies less reallocation
{$endif FPCMM_BOOST}
{$ifdef FPCMM_SERVER}
{$define FPCMM_DEBUG}
{$define FPCMM_ASSUMEMULTITHREAD}
{$define FPCMM_LOCKLESSFREE}
{$define FPCMM_ERMS}
{$endif FPCMM_SERVER}
{$ifdef FPCMM_BOOSTER}
{$undef FPCMM_DEBUG} // when performance matters more than stats
{$endif FPCMM_BOOSTER}
{$endif FPC}
{$ifdef FPCMM_DISABLE}
{$undef FPCX64MM} // e.g. when compiled as Design-Time Lazarus package
{$endif FPCMM_DISABLE}
{$ifdef FPCX64MM}
// this unit is available only for FPC + X86_64 CPU
// other targets would compile as a void unit
type
/// Arena (middle/large) heap information as returned by CurrentHeapStatus
TMMStatusArena = record
/// how many bytes are currently reserved (mmap) to the Operating System
CurrentBytes: PtrUInt;
/// how many bytes have been reserved (mmap) to the Operating System
CumulativeBytes: PtrUInt;
{$ifdef FPCMM_DEBUG}
/// maximum bytes count reserved (mmap) to the Operating System
PeakBytes: PtrUInt;
/// how many VirtualAlloc/mmap calls to the Operating System did occur
CumulativeAlloc: PtrUInt;
/// how many VirtualFree/munmap calls to the Operating System did occur
CumulativeFree: PtrUInt;
{$endif FPCMM_DEBUG}
/// how many times this Arena did wait from been unlocked by another thread
SleepCount: PtrUInt;
end;
/// heap information as returned by CurrentHeapStatus
TMMStatus = record
/// how many tiny/small memory blocks (<=2600 bytes) are currently allocated
SmallBlocks: PtrUInt;
/// how many bytes of tiny/small memory blocks are currently allocated
// - this size is included in Medium.CurrentBytes value, even if
// FPCMM_SMALLNOTWITHMEDIUM has been defined
SmallBlocksSize: PtrUInt;
/// information about blocks up to 256KB (tiny, small and medium)
// - includes also the memory needed for tiny/small blocks
// - is shared by both small & medium pools even if FPCMM_SMALLNOTWITHMEDIUM
Medium: TMMStatusArena;
/// information about large blocks > 256KB
// - those blocks are directly handled by the Operating System
Large: TMMStatusArena;
{$ifdef FPCMM_DEBUG}
{$ifdef FPCMM_SLEEPTSC}
/// how much rdtsc cycles were spent within SwitchToThread/NanoSleep API
// - we rdtsc since it is an indicative but very fast way of timing on
// direct hardware
// - warning: on virtual machines, the rdtsc opcode is usually emulated so
// these SleepCycles number are non indicative anymore
SleepCycles: PtrUInt;
{$endif FPCMM_SLEEPTSC}
{$ifdef FPCMM_LOCKLESSFREE}
/// how many times Freemem() did spin to acquire its lock-less bin list
SmallFreememLockLessSpin: PtrUInt;
{$endif FPCMM_LOCKLESSFREE}
{$endif FPCMM_DEBUG}
/// how many times the Operating System Sleep/NanoSleep API was called
// - in a perfect world, should be as small as possible: mormot2tests run as
// SleepCount=0 when FPCMM_LOCKLESSFREE is defined (FPCMM_SERVER default)
SleepCount: PtrUInt;
/// how many times Getmem() did block and wait for a tiny/small block
// - see also GetSmallBlockContention() for more detailed information
SmallGetmemSleepCount: PtrUInt;
/// how many times Freemem() did block and wait for a tiny/small block
// - see also GetSmallBlockContention() for more detailed information
SmallFreememSleepCount: PtrUInt;
end;
PMMStatus = ^TMMStatus;
/// allocate a new memory buffer
// - as FPC default heap, _Getmem(0) returns _Getmem(1)
function _GetMem(size: PtrUInt): pointer;
/// allocate a new zeroed memory buffer
function _AllocMem(Size: PtrUInt): pointer;
/// release a memory buffer
// - returns the allocated size of the supplied pointer (as FPC default heap)
function _FreeMem(P: pointer): PtrUInt;
/// change the size of a memory buffer
// - won't move any data if in-place reallocation is possible
// - as FPC default heap, _ReallocMem(P=nil,Size) maps P := _getmem(Size) and
// _ReallocMem(P,0) maps _Freemem(P)
function _ReallocMem(var P: pointer; Size: PtrUInt): pointer;
/// retrieve the maximum size (i.e. the allocated size) of a memory buffer
function _MemSize(P: pointer): PtrUInt; inline;
/// retrieve high-level statistics about the current memory manager state
// - see also GetSmallBlockContention for detailed small blocks information
// - standard GetHeapStatus and GetFPCHeapStatus gives less accurate information
// (only CurrHeapSize and MaxHeapSize are set), since we don't track "free" heap
// bytes: I can't figure how "free" memory is relevant nowadays - on 21th century
// Operating Systems, memory is virtual, and reserved/mapped by the OS but
// physically hosted in the HW RAM chips only when written the first time -
// GetHeapStatus information made sense on MSDOS with fixed 640KB of RAM
// - note that FPC GetHeapStatus and GetFPCHeapStatus is only about the
// current thread (irrelevant for sure) whereas CurrentHeapStatus is global
function CurrentHeapStatus: TMMStatus;
{$ifdef FPCMM_STANDALONE}
/// should be called before using any memory function
procedure InitializeMemoryManager;
/// should be called to finalize this memory manager process and release all RAM
procedure FreeAllMemory;
{$undef FPCMM_DEBUG} // excluded FPC-specific debugging
/// IsMultiThread global variable is not correct outside of the FPC RTL
{$define FPCMM_ASSUMEMULTITHREAD}
/// not supported to reduce dependencies and console writing
{$undef FPCMM_REPORTMEMORYLEAKS}
{$else}
type
/// one GetSmallBlockContention info about unexpected multi-thread waiting
// - a single GetmemBlockSize or FreememBlockSize non 0 field is set
TSmallBlockContention = packed record
/// how many times a small block getmem/freemem has been waiting for unlock
SleepCount: PtrUInt;
/// the small block size on which Getmem() has been blocked - or 0
GetmemBlockSize: PtrUInt;
/// the small block size on which Freemem() has been blocked - or 0
FreememBlockSize: PtrUInt;
end;
/// small blocks detailed information as returned GetSmallBlockContention
TSmallBlockContentionDynArray = array of TSmallBlockContention;
/// one GetSmallBlockStatus information
TSmallBlockStatus = packed record
/// how many times a memory block of this size has been allocated
Total: PtrUInt;
/// how many memory blocks of this size are currently allocated
Current: PtrUInt;
/// the standard size of the small memory block
BlockSize: PtrUInt;
end;
/// small blocks detailed information as returned GetSmallBlockStatus
TSmallBlockStatusDynArray = array of TSmallBlockStatus;
/// sort order of detailed information as returned GetSmallBlockStatus
TSmallBlockOrderBy = (
obTotal,
obCurrent,
obBlockSize);
/// retrieve the use counts of allocated small blocks
// - returns maxcount biggest results, sorted by "orderby" field occurence
function GetSmallBlockStatus(maxcount: integer = 10;
orderby: TSmallBlockOrderBy = obTotal; count: PPtrUInt = nil; bytes: PPtrUInt = nil;
small: PCardinal = nil; tiny: PCardinal = nil): TSmallBlockStatusDynArray;
/// retrieve all small blocks which suffered from blocking during multi-thread
// - returns maxcount biggest results, sorted by SleepCount occurence
function GetSmallBlockContention(
maxcount: integer = 10): TSmallBlockContentionDynArray;
/// convenient debugging function into the console
// - if smallblockcontentioncount > 0, includes GetSmallBlockContention() info
// up to the smallblockcontentioncount biggest occurences
procedure WriteHeapStatus(const context: ShortString = '';
smallblockstatuscount: integer = 8; smallblockcontentioncount: integer = 8;
compilationflags: boolean = false);
/// convenient debugging function into a string
// - if smallblockcontentioncount > 0, includes GetSmallBlockContention() info
// up to the smallblockcontentioncount biggest occurences
// - warning: this function is not thread-safe
function GetHeapStatus(const context: ShortString; smallblockstatuscount,
smallblockcontentioncount: integer; compilationflags, onsameline: boolean): string;
const
/// human readable information about how our MM was built
// - similar to WriteHeapStatus(compilationflags=true) output
FPCMM_FLAGS = ' '
{$ifdef FPCMM_BOOSTER} + 'BOOSTER ' {$else}
{$ifdef FPCMM_BOOST} + 'BOOST ' {$else}
{$ifdef FPCMM_SERVER} + 'SERVER ' {$endif}
{$endif FPCMM_BOOST}
{$endif FPCMM_BOOSTER}
{$ifdef FPCMM_ASSUMEMULTITHREAD} + ' assumulthrd' {$endif}
{$ifdef FPCMM_LOCKLESSFREE} + ' lockless' {$endif}
{$ifdef FPCMM_PAUSE} + ' pause' {$endif}
{$ifdef FPCMM_SLEEPTSC} + ' rdtsc' {$endif}
{$ifndef BSD}
{$ifdef FPCMM_NOMREMAP} + ' nomremap' {$endif}
{$endif BSD}
{$ifdef FPCMM_SMALLNOTWITHMEDIUM}+ ' smallpool' {$endif}
{$ifdef FPCMM_ERMS} + ' erms' {$endif}
{$ifdef FPCMM_DEBUG} + ' debug' {$endif}
{$ifdef FPCMM_REPORTMEMORYLEAKS} + ' repmemleak' {$endif};
{$endif FPCMM_STANDALONE}
{$endif FPCX64MM}
implementation
{
High-level Allocation Strategy Description
--------------------------------------------
The allocator handles the following families of memory blocks:
- TINY <= 128 B (<= 256 B for FPCMM_BOOST)
Round-robin distribution into several arenas, fed from shared tiny/small pool
(fair scaling from multi-threaded calls, with no threadvar nor GC involved)
- SMALL <= 2600 B
One arena per block size, fed from shared tiny/small pool
- MEDIUM <= 256 KB
Separated pool of bitmap-marked chunks, fed from 1MB of OS mmap/virtualalloc
- LARGE > 256 KB
Directly fed from OS mmap/virtualalloc with mremap when growing
The original FastMM4 was enhanced as such, especially in FPCMM_SERVER mode:
- FPC compatibility, even on POSIX/Linux, also for FPC specific API behavior;
- Memory leaks and thread contention tracked without performance impact;
- Detailed per-block statistics with little performance penalty;
- x86_64 code was refactored and tuned in respect to 2020's hardware;
- Inlined SSE2 movaps loop or ERMS are more efficient that subfunction(s);
- New round-robin thread-friendly arenas of tiny blocks;
- Tiny and small blocks can fed from their own pool, not the medium pool;
- Additional bin list to reduce small/tiny Freemem() thread contention;
- Large blocks logic has been rewritten, especially realloc;
- AllocMedium() and AllocLarge() use MAP_POPULATE to reduce page faults;
- On Linux, mremap is used for efficient realloc of large blocks;
- Largest blocks can grow by 2MB=PMD_SIZE chunks for even faster mremap.
About locking:
- Tiny and Small blocks have their own per-size lock;
- Tiny and Small blocks have one giant lock when fedding from their pool;
- Medium and Large blocks have one giant lock over their own pool;
- Medium blocks have a unlocked prefetched memory chunk to reduce contention;
- Large blocks don't lock during mmap/virtualalloc system calls;
- SwitchToThread/FpNanoSleep OS call is done after initial spinning;
- FPCMM_LOCKLESSFREE reduces Freemem() thread contention;
- FPCMM_DEBUG / WriteHeapStatus helps identifying the lock contention(s).
}
{$ifdef FPCX64MM}
// this unit is available only for FPC + X86_64 CPU
{$ifndef FPCMM_NOPAUSE}
// on contention problem, execute "pause" opcode and spin retrying the lock
// - defined by default to follow Intel recommendatations from
// https://software.intel.com/content/www/us/en/develop/articles/benefitting-power-and-performance-sleep-loops.html
// - spinning loop is either using constants or rdtsc (if FPCMM_SLEEPTSC is set)
// - on SkylakeX (Intel 7th gen), "pause" opcode went from 10-20 to 140 cycles
// so our constants below will favor those latest CPUs with a longer pause
{$define FPCMM_PAUSE}
{$endif FPCMM_NOPAUSE}
{ ********* Operating System Specific API Calls }
{$ifdef MSWINDOWS}
// Win64: any assembler function with sub-calls should have a stack frame
// -> nostackframe is defined only on Linux or for functions with no nested call
{$undef NOSFRAME}
const
kernel32 = 'kernel32.dll';
MEM_COMMIT = $1000;
MEM_RESERVE = $2000;
MEM_RELEASE = $8000;
MEM_FREE = $10000;
MEM_TOP_DOWN = $100000;
PAGE_READWRITE = 4;
PAGE_GUARD = $0100;
PAGE_VALID = $00e6; // PAGE_READONLY or PAGE_READWRITE or PAGE_EXECUTE or
// PAGE_EXECUTE_READ or PAGE_EXECUTE_READWRITE or PAGE_EXECUTE_WRITECOPY
type
// VirtualQuery() API result structure
TMemInfo = record
BaseAddress, AllocationBase: pointer;
AllocationProtect: cardinal;
PartitionId: word;
RegionSize: PtrUInt;
State, Protect, MemType: cardinal;
end;
function VirtualAlloc(lpAddress: pointer;
dwSize: PtrUInt; flAllocationType, flProtect: cardinal): pointer;
stdcall; external kernel32 name 'VirtualAlloc';
function VirtualFree(lpAddress: pointer; dwSize: PtrUInt;
dwFreeType: cardinal): LongBool;
stdcall; external kernel32 name 'VirtualFree';
procedure SwitchToThread;
stdcall; external kernel32 name 'SwitchToThread';
function VirtualQuery(lpAddress, lpMemInfo: pointer; dwLength: PtrUInt): PtrUInt;
stdcall; external kernel32 name 'VirtualQuery';
function AllocMedium(Size: PtrInt): pointer; inline;
begin
// bottom-up allocation to reduce fragmentation
result := VirtualAlloc(nil, Size, MEM_COMMIT, PAGE_READWRITE);
end;
function AllocLarge(Size: PtrInt): pointer; inline;
begin
// top-down allocation of large blocks to reduce fragmentation
// (MEM_TOP_DOWN is not available on POSIX, but seems less needed)
result := VirtualAlloc(nil, Size, MEM_COMMIT or MEM_TOP_DOWN, PAGE_READWRITE);
end;
procedure FreeMediumLarge(ptr: pointer; Size: PtrInt); inline;
begin
VirtualFree(ptr, 0, MEM_RELEASE);
end;
{$ifndef FPCMM_NOMREMAP}
function RemapLarge(addr: pointer; old_len, new_len: size_t): pointer;
var
meminfo: TMemInfo;
next: pointer;
nextsize: PtrUInt;
begin
// old_len and new_len have 64KB granularity, so match Windows page size
nextsize := new_len - old_len;
if PtrInt(nextsize) > 0 then
begin
// try to allocate the memory just after the existing one
FillChar(meminfo, SizeOf(meminfo), 0);
next := addr + old_len;
if (VirtualQuery(next, @meminfo, SizeOf(meminfo)) = SizeOf(meminfo)) and
(meminfo.State = MEM_FREE) and
(meminfo.RegionSize >= nextsize) and // enough space?
// reserve the address space in two steps for thread safety
(VirtualAlloc(next, nextsize, MEM_RESERVE, PAGE_READWRITE) <> nil) and
(VirtualAlloc(next, nextsize, MEM_COMMIT, PAGE_READWRITE) <> nil) then
begin
result := addr; // in-place realloc: no need to move memory :)
exit;
end;
end;
// we need to use the slower but safe Alloc/Move/Free pattern
result := AllocLarge(new_len);
if new_len > old_len then
new_len := old_len; // handle size up or down
Move(addr^, result^, new_len); // RTL non-volatile asm or our AVX MoveFast()
FreeMediumLarge(addr, old_len);
end;
{$endif FPCMM_NOMREMAP}
// aligning large chunks > 4MB to 2MB units seems always a good idea
{$define FPCMM_LARGEBIGALIGN}
// experimental VirtualQuery detection of object class - use at your own risk
{$define FPCMM_REPORTMEMORYLEAKS_EXPERIMENTAL}
{$else}
uses
{$ifndef DARWIN}
syscall,
{$endif DARWIN}
BaseUnix;
// in practice, SYSV ABI seems to not require a stack frame, as Win64 does, for
// our use case of nested calls with no local stack storage and direct kernel
// syscalls - but since it is clearly undocumented, we set it on LINUX only
// -> appears to work with no problem from our tests: feedback is welcome!
// -> see FPCMM_NOSFRAME conditional to disable it on LINUX
{$ifdef LINUX}
{$define NOSFRAME}
{$else}
{$define OLDLINUXKERNEL} // no Linuxism on BSD
{$endif LINUX}
// on Linux, mremap() on PMD_SIZE=2MB aligned data can make a huge speedup
// see https://lwn.net/Articles/833208 - so FPCMM_LARGEBIGALIGN is always set
{$ifdef LINUX}
{$define FPCMM_LARGEBIGALIGN} // align large chunks to 21-bit = 2MB = PMD_SIZE
{$endif LINUX}
// we directly call the OS Kernel, so this unit doesn't require any libc
const
{$ifdef OLDLINUXKERNEL}
{$undef FPCMM_MEDIUM32BIT}
MAP_POPULATE = 0;
{$else}
/// put the mapping in first 2 GB of memory (31-bit addresses) - 2.4.20, 2.6
MAP_32BIT = $40;
/// populate (prefault) pagetables to avoid page faults later - 2.5.46
MAP_POPULATE = $08000;
{$endif OLDLINUXKERNEL}
// tiny/small/medium blocks mmap() flags
// - MAP_POPULATE is included to enhance performance on single thread app, and
// also on heavily multi-threaded process (but perhaps not with few threads)
// - FPCMM_MEDIUM32BIT allocates as 31-bit pointers, but may be incompatible
// with TOrmTable for data >256KB so requires NOPOINTEROFFSET conditional,
// therefore is not set by default
MAP_MEDIUM = MAP_PRIVATE or MAP_ANONYMOUS or MAP_POPULATE
{$ifdef FPCMM_MEDIUM32BIT} or MAP_32BIT {$endif};
// large blocks mmap() flags
// - no MAP_32BIT since could use the whole 64-bit address space
// - MAP_POPULATE is included on Linux to avoid page faults, with
// no penalty since mmap/mremap are called outside the large blocks lock
MAP_LARGE = MAP_PRIVATE or MAP_ANONYMOUS or MAP_POPULATE;
{$ifdef FPCMM_MEDIUM32BIT}
var
AllocMediumflags: integer = MAP_MEDIUM;
{$else}
AllocMediumflags = MAP_MEDIUM;
{$endif FPCMM_MEDIUM32BIT}
function AllocMedium(Size: PtrInt): pointer;
begin
result := fpmmap(nil, Size, PROT_READ or PROT_WRITE, AllocMediumflags, -1, 0);
{$ifdef FPCMM_MEDIUM32BIT}
if (result <> nil) or
(AllocMediumflags and MAP_32BIT = 0) then
exit;
// try with no 2GB limit from now on
AllocMediumflags := AllocMediumflags and not MAP_32BIT;
result := AllocMedium(Size); // try with no 2GB limit from now on
{$endif FPCMM_MEDIUM32BIT}
end;
function AllocLarge(Size: PtrInt): pointer; inline;
begin
result := fpmmap(nil, Size, PROT_READ or PROT_WRITE, MAP_LARGE, -1, 0);
end;
procedure FreeMediumLarge(ptr: pointer; Size: PtrInt); inline;
begin
fpmunmap(ptr, Size);
end;
{$ifdef LINUX}
{$ifndef FPCMM_NOMREMAP}
const
syscall_nr_mremap = 25; // valid on x86_64 Linux and Android
MREMAP_MAYMOVE = 1;
function RemapLarge(addr: pointer; old_len, new_len: size_t): pointer; inline;
begin
// let the Linux Kernel mremap() the memory using its TLB magic
result := pointer(do_syscall(syscall_nr_mremap, TSysParam(addr),
TSysParam(old_len), TSysParam(new_len), TSysParam(MREMAP_MAYMOVE)));
end;
{$endif FPCMM_NOMREMAP}
// experimental detection of object class - use at your own risk
{$define FPCMM_REPORTMEMORYLEAKS_EXPERIMENTAL}
// (untested on BSD/DARWIN)
{$else}
{$define FPCMM_NOMREMAP} // mremap is a Linux-specific syscall
{$endif LINUX}
procedure SwitchToThread;
var
t: Ttimespec;
begin
// note: nanosleep() adds a few dozen of microsecs for context switching
t.tv_sec := 0;
t.tv_nsec := 10; // empirically identified on a recent Linux Kernel
fpnanosleep(@t, nil);
end;
{$endif MSWINDOWS}
// fallback to safe and simple Alloc/Move/Free pattern
{$ifdef FPCMM_NOMREMAP}
function RemapLarge(addr: pointer; old_len, new_len: size_t): pointer;
begin
result := AllocLarge(new_len);
if new_len > old_len then
new_len := old_len; // resize down
Move(addr^, result^, new_len); // RTL non-volatile asm or our AVX MoveFast()
FreeMediumLarge(addr, old_len);
end;
{$undef FPCMM_LARGEBIGALIGN} // keep 64KB granularity if no mremap()
{$endif FPCMM_NOMREMAP}
{ ********* Some Assembly Helpers }
// low-level conditional to disable nostackframe code on Linux
{$ifdef FPCMM_NOSFRAME}
{$undef NOSFRAME}
{$endif FPCMM_NOSFRAME}
var
HeapStatus: TMMStatus;
{$ifdef FPCMM_DEBUG}
procedure ReleaseCore;
{$ifdef NOSFRAME} nostackframe; {$endif} assembler;
asm
{$ifdef FPCMM_SLEEPTSC}
rdtsc // returns the TSC in EDX:EAX
shl rdx, 32
or rax, rdx
push rax
call SwitchToThread
pop rcx
rdtsc
shl rdx, 32
or rax, rdx
lea rdx, [rip + HeapStatus]
sub rax, rcx
lock add qword ptr [rdx + TMMStatus.SleepCycles], rax
{$else}
call SwitchToThread
lea rdx, [rip + HeapStatus]
{$endif FPCMM_SLEEPTSC}
lock inc qword ptr [rdx + TMMStatus.SleepCount]
end;
{$else}
procedure ReleaseCore;
begin
SwitchToThread;
inc(HeapStatus.SleepCount); // indicative counter
end;
{$endif FPCMM_DEBUG}
procedure NotifyArenaAlloc(var Arena: TMMStatusArena; Size: PtrUInt);
nostackframe; assembler;
asm
{$ifdef FPCMM_DEBUG}
lock add qword ptr [Arena].TMMStatusArena.CurrentBytes, Size
lock add qword ptr [Arena].TMMStatusArena.CumulativeBytes, Size
lock inc qword ptr [Arena].TMMStatusArena.CumulativeAlloc
mov rax, qword ptr [Arena].TMMStatusArena.CurrentBytes
cmp rax, qword ptr [Arena].TMMStatusArena.PeakBytes
jbe @s
mov qword ptr [Arena].TMMStatusArena.PeakBytes, rax
@s: {$else}
add qword ptr [Arena].TMMStatusArena.CurrentBytes, Size
add qword ptr [Arena].TMMStatusArena.CumulativeBytes, Size
{$endif FPCMM_DEBUG}
end;
procedure NotifyMediumLargeFree(var Arena: TMMStatusArena; Size: PtrUInt);
nostackframe; assembler;
asm
neg Size
{$ifdef FPCMM_DEBUG}
lock add qword ptr [Arena].TMMStatusArena.CurrentBytes, Size
lock inc qword ptr [Arena].TMMStatusArena.CumulativeFree
{$else}
add qword ptr [Arena].TMMStatusArena.CurrentBytes, Size
{$endif FPCMM_DEBUG}
end;
{ ********* Constants and Data Structures Definitions }
// during spinning, there is clearly thread contention: in this case, plain
// "cmp" before "lock cmpxchg" is mandatory to leverage the CPU cores
{$define FPCMM_CMPBEFORELOCK_SPIN}
// prepare a Medium arena chunk in TMediumInfo.Prefetch outside of the lock
{$define FPCMM_MEDIUMPREFETCH}
// on contention, use a bin list to implement medium blocks freeing
// - disabled, since medium locks occur at getmem: freemem bin got MaxCount=0
{.$define FPCMM_LOCKLESSFREEMEDIUM}
const
// (sometimes) the more arenas, the better multi-threadable
{$ifdef FPCMM_BOOSTER}
NumTinyBlockTypesPO2 = 4;
NumTinyBlockArenasPO2 = 4; // will probably end up with Medium lock contention
{$else}
{$ifdef FPCMM_BOOST}
NumTinyBlockTypesPO2 = 4; // tiny are <= 256 bytes
NumTinyBlockArenasPO2 = 3; // 8 arenas
{$else}
// default (or FPCMM_SERVER) settings
NumTinyBlockTypesPO2 = 3; // multiple arenas for tiny blocks <= 128 bytes
NumTinyBlockArenasPO2 = 3; // 8 round-robin arenas (including main) by default
{$endif FPCMM_BOOST}
{$endif FPCMM_BOOSTER}
NumSmallBlockTypes = 46;
MaximumSmallBlockSize = 2608;
SmallBlockSizes: array[0..NumSmallBlockTypes - 1] of word = (
16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256,
272, 288, 304, 320, 352, 384, 416, 448, 480, 528, 576, 624, 672, 736, 800,
880, 960, 1056, 1152, 1264, 1376, 1504, 1648, 1808, 1984, 2176, 2384,
MaximumSmallBlockSize, MaximumSmallBlockSize, MaximumSmallBlockSize);
NumTinyBlockTypes = 1 shl NumTinyBlockTypesPO2;
NumTinyBlockArenas = (1 shl NumTinyBlockArenasPO2) - 1; // -1 = main Small[]
NumSmallInfoBlock = NumSmallBlockTypes + NumTinyBlockArenas * NumTinyBlockTypes;
SmallBlockGranularity = 16;
TargetSmallBlocksPerPool = 48;
MinimumSmallBlocksPerPool = 12;
SmallBlockDownsizeCheckAdder = 64;
SmallBlockUpsizeAdder = 32;
{$ifdef FPCMM_LOCKLESSFREE}
SmallBlockTypePO2 = 8; // SizeOf(TSmallBlockType)=256 with Bin list
SmallBlockBinCount = (((1 shl SmallBlockTypePO2) - 64) div 8) - 1; // =23
{$else}
SmallBlockTypePO2 = 6; // SizeOf(TSmallBlockType)=64
{$endif FPCMM_LOCKLESSFREE}
MediumBlockPoolSizeMem = 20 * 64 * 1024;
MediumBlockPoolSize = MediumBlockPoolSizeMem - 16;
MediumBlockSizeOffset = 48;
MinimumMediumBlockSize = 11 * 256 + MediumBlockSizeOffset;
MediumBlockBinsPerGroup = 32;
MediumBlockBinGroupCount = 32;
MediumBlockBinCount = MediumBlockBinGroupCount * MediumBlockBinsPerGroup;
MediumBlockGranularity = 256;
MaximumMediumBlockSize =
MinimumMediumBlockSize + (MediumBlockBinCount - 1) * MediumBlockGranularity;
OptimalSmallBlockPoolSizeLowerLimit =
29 * 1024 - MediumBlockGranularity + MediumBlockSizeOffset;
OptimalSmallBlockPoolSizeUpperLimit =
64 * 1024 - MediumBlockGranularity + MediumBlockSizeOffset;
MaximumSmallBlockPoolSize =
OptimalSmallBlockPoolSizeUpperLimit + MinimumMediumBlockSize;
MediumInPlaceDownsizeLimit = MinimumMediumBlockSize div 4;
IsFreeBlockFlag = 1;
IsMediumBlockFlag = 2;
IsSmallBlockPoolInUseFlag = 4;
IsLargeBlockFlag = 4;
PreviousMediumBlockIsFreeFlag = 8;
LargeBlockIsSegmented = 8;
DropSmallFlagsMask = -8;
ExtractSmallFlagsMask = 7;
DropMediumAndLargeFlagsMask = -16;
ExtractMediumAndLargeFlagsMask = 15;
{$ifdef FPCMM_SLEEPTSC}
// pause using rdtsc (30 cycles latency on hardware but emulated on VM)
SpinMediumLockTSC = 10000;
SpinLargeLockTSC = 10000;
{$ifdef FPCMM_PAUSE}
SpinSmallGetmemLockTSC = 1000;
SpinSmallFreememLockTSC = 1000; // _freemem has more collisions
{$ifdef FPCMM_LOCKLESSFREE}
SpinSmallFreememBinTSC = 2000;
{$endif FPCMM_LOCKLESSFREE}
{$endif FPCMM_PAUSE}
{$else}
// pause with constant spinning counts (empirical values from fastmm4-avx)
SpinMediumLockCount = 2500;
SpinLargeLockCount = 5000;
{$ifdef FPCMM_PAUSE}
SpinSmallGetmemLockCount = 500;
SpinSmallFreememLockCount = 500;
{$ifdef FPCMM_LOCKLESSFREE}
SpinFreememBinCount = 500;
{$endif FPCMM_LOCKLESSFREE}
{$endif FPCMM_PAUSE}
{$endif FPCMM_SLEEPTSC}
{$ifdef FPCMM_ERMS}
// pre-ERMS expects at least 256 bytes, IvyBridge+ with ERMS is good from 64
// (copy_user_enhanced_fast_string() in recent Linux kernel uses 64)
// see https://stackoverflow.com/a/43837564/458259 for explanations and timing
// -> "movaps" loop is used up to 256 bytes of data: good on all CPUs
// -> "movnt" Move/MoveFast is used for large blocks: always faster than ERMS
ErmsMinSize = 256;
{$endif FPCMM_ERMS}
type
PSmallBlockPoolHeader = ^TSmallBlockPoolHeader;
// information for each small block size - 64/256 bytes long >= CPU cache line
TSmallBlockType = record
Locked: boolean;
AllowedGroupsForBlockPoolBitmap: byte;
BlockSize: Word;
MinimumBlockPoolSize: Word;
OptimalBlockPoolSize: Word;
NextPartiallyFreePool: PSmallBlockPoolHeader;
PreviousPartiallyFreePool: PSmallBlockPoolHeader;
NextSequentialFeedBlockAddress: pointer;
MaxSequentialFeedBlockAddress: pointer;
CurrentSequentialFeedPool: PSmallBlockPoolHeader;
GetmemCount: cardinal;
FreememCount: cardinal;
GetmemSleepCount: cardinal;
FreememSleepCount: cardinal;
{$ifdef FPCMM_LOCKLESSFREE} // 192 optional bytes for FreeMem Bin (= 13KB)
BinLocked: boolean; // dedicated lock for less contention
BinCount: byte;
BinSpinCount: cardinal;
BinInstance: array[0.. SmallBlockBinCount - 1] of pointer;
{$endif FPCMM_LOCKLESSFREE}
end;
PSmallBlockType = ^TSmallBlockType;
TSmallBlockTypes = array[0..NumSmallBlockTypes - 1] of TSmallBlockType;
TTinyBlockTypes = array[0..NumTinyBlockTypes - 1] of TSmallBlockType;
TSmallBlockInfo = record
Small: TSmallBlockTypes;
Tiny: array[0..NumTinyBlockArenas - 1] of TTinyBlockTypes;
GetmemLookup: array[0..
(MaximumSmallBlockSize - 1) div SmallBlockGranularity] of byte;
{$ifndef FPCMM_ASSUMEMULTITHREAD}
IsMultiThreadPtr: PBoolean; // safe access to IsMultiThread global variable
{$endif FPCMM_ASSUMEMULTITHREAD}
TinyCurrentArena: integer;
end;
TSmallBlockPoolHeader = record
BlockType: PSmallBlockType;
{$ifdef CPU32}
Padding32Bits: cardinal; // for 8*4=32 bytes alignment
{$endif CPU32}
NextPartiallyFreePool: PSmallBlockPoolHeader;
PreviousPartiallyFreePool: PSmallBlockPoolHeader;
FirstFreeBlock: pointer;
BlocksInUse: cardinal;
SmallBlockPoolSignature: cardinal;
FirstBlockPoolPointerAndFlags: PtrUInt;
end;
PMediumBlockPoolHeader = ^TMediumBlockPoolHeader;
TMediumBlockPoolHeader = record
PreviousMediumBlockPoolHeader: PMediumBlockPoolHeader;
NextMediumBlockPoolHeader: PMediumBlockPoolHeader;
Reserved1: PtrUInt;
FirstMediumBlockSizeAndFlags: PtrUInt;
end;
PMediumFreeBlock = ^TMediumFreeBlock;
TMediumFreeBlock = record
PreviousFreeBlock: PMediumFreeBlock;
NextFreeBlock: PMediumFreeBlock;
end;
{$ifdef FPCMM_LOCKLESSFREEMEDIUM}
const
MediumBlockLocklessBinCount = 255;
type
// used by TMediumBlockInfo to reduce thread pressure
TMediumLocklessBin = record
Locked: boolean; // dedicated lock for less contention
Count: byte;
MaxCount: byte;
Instance: array[0 .. MediumBlockLocklessBinCount - 1] of pointer;
end;
{$endif FPCMM_LOCKLESSFREEMEDIUM}
TMediumBlockInfo = record
Locked: boolean;
{$ifdef FPCMM_MEDIUMPREFETCH}
PrefetchLocked: boolean;
{$endif FPCMM_MEDIUMPREFETCH}
PoolsCircularList: TMediumBlockPoolHeader;
LastSequentiallyFed: pointer;
SequentialFeedBytesLeft: cardinal;
BinGroupBitmap: cardinal;
{$ifdef FPCMM_MEDIUMPREFETCH}
Prefetch: pointer;
{$endif FPCMM_MEDIUMPREFETCH}
{$ifndef FPCMM_ASSUMEMULTITHREAD}
IsMultiThreadPtr: PBoolean; // safe access to IsMultiThread global variable
{$endif FPCMM_ASSUMEMULTITHREAD}
BinBitmaps: array[0..MediumBlockBinGroupCount - 1] of cardinal;
Bins: array[0..MediumBlockBinCount - 1] of TMediumFreeBlock;
{$ifdef FPCMM_LOCKLESSFREEMEDIUM}
LocklessBin: TMediumLocklessBin;
{$endif FPCMM_LOCKLESSFREEMEDIUM}
end;
PLargeBlockHeader = ^TLargeBlockHeader;
TLargeBlockHeader = record
PreviousLargeBlockHeader: PLargeBlockHeader;
NextLargeBlockHeader: PLargeBlockHeader;
Reserved: PtrUInt;
BlockSizeAndFlags: PtrUInt;
end;
const
BlockHeaderSize = SizeOf(pointer);
SmallBlockPoolHeaderSize = SizeOf(TSmallBlockPoolHeader);
MediumBlockPoolHeaderSize = SizeOf(TMediumBlockPoolHeader);
LargeBlockHeaderSize = SizeOf(TLargeBlockHeader);
LargeBlockGranularity = 1 shl 16; // 64KB for (smallest) large blocks
{$ifdef FPCMM_LARGEBIGALIGN}
LargeBlockGranularity2 = 1 shl 21; // PMD_SIZE=2MB granularity
LargeBlockGranularity2Size = 2 shl 21; // for size >= 4MB
// on Linux, mremap() on PMD_SIZE=2MB aligned data can make a huge speedup
{$endif FPCMM_LARGEBIGALIGN}
var
SmallBlockInfo: TSmallBlockInfo;
MediumBlockInfo: TMediumBlockInfo;
SmallMediumBlockInfo: TMediumBlockInfo
{$ifndef FPCMM_SMALLNOTWITHMEDIUM}
absolute MediumBlockInfo
{$endif FPCMM_SMALLNOTWITHMEDIUM} ;
LargeBlocksLocked: boolean;
LargeBlocksCircularList: TLargeBlockHeader;
{ ********* Shared Routines }
procedure LockMediumBlocks(dummy: cardinal);
{$ifdef NOSFRAME} nostackframe; {$endif} assembler;
// on input/output: r10=TMediumBlockInfo
asm
{$ifdef FPCMM_MEDIUMPREFETCH}
// since we are waiting for the lock, prefetch one medium memory chunk
mov rcx, r10
xor edx, edx
cmp qword ptr [rcx].TMediumBlockInfo.Prefetch, rdx
jnz @s
{$ifdef FPCMM_CMPBEFORELOCK_SPIN}
cmp byte ptr [rcx].TMediumBlockInfo.PrefetchLocked, dl
jnz @s
{$endif FPCMM_CMPBEFORELOCK_SPIN}
mov eax, $100
lock cmpxchg byte ptr [rcx].TMediumBlockInfo.PrefetchLocked, ah
jne @s
cmp qword ptr [rcx].TMediumBlockInfo.Prefetch, rdx
jnz @s2
push rsi
push rdi
push r10
push r11
mov dummy, MediumBlockPoolSizeMem
call AllocMedium
pop r11
pop r10
pop rdi
pop rsi
mov qword ptr [r10].TMediumBlockInfo.Prefetch, rax
@s2: mov byte ptr [r10].TMediumBlockInfo.PrefetchLocked, false
{$endif FPCMM_MEDIUMPREFETCH}
// spin and acquire the medium arena lock
{$ifdef FPCMM_SLEEPTSC}
@s: rdtsc // tsc in edx:eax
shl rdx, 32
lea r9, [rax + rdx + SpinMediumLockTSC] // r9 = endtsc
@sp: pause
rdtsc
shl rdx, 32
or rax, rdx
cmp rax, r9
ja @rc // timeout
{$else}
@s: mov edx, SpinMediumLockCount
@sp: pause
dec edx
jz @rc //timeout
{$endif FPCMM_SLEEPTSC}
mov rcx, r10
mov eax, $100
{$ifdef FPCMM_CMPBEFORELOCK_SPIN}
cmp byte ptr [r10].TMediumBlockInfo.Locked, true
je @sp
{$endif FPCMM_CMPBEFORELOCK_SPIN}
lock cmpxchg byte ptr [rcx].TMediumBlockInfo.Locked, ah
je @ok
jmp @sp
@rc: push rsi // preserve POSIX and Win64 ABI registers
push rdi
push r10
push r11
call ReleaseCore
pop r11
pop r10
pop rdi
pop rsi
lea rax, [rip + HeapStatus]
{$ifdef FPCMM_DEBUG} lock {$endif}
inc qword ptr [rax].TMMStatus.Medium.SleepCount
jmp @s
@ok:
end;
procedure InsertMediumBlockIntoBin; nostackframe; assembler;
// rcx=P edx=blocksize r10=TMediumBlockInfo - even on POSIX
asm
mov rax, rcx
// Get the bin number for this block size
sub edx, MinimumMediumBlockSize
shr edx, 8
// Validate the bin number
sub edx, MediumBlockBinCount - 1
sbb ecx, ecx
and edx, ecx
add edx, MediumBlockBinCount - 1
mov r9, rdx
// Get the bin address in rcx
shl edx, 4
lea rcx, [r10 + rdx + TMediumBlockInfo.Bins]
// Bins are LIFO, se we insert this block as the first free block in the bin
mov rdx, TMediumFreeBlock[rcx].NextFreeBlock
mov TMediumFreeBlock[rax].PreviousFreeBlock, rcx
mov TMediumFreeBlock[rax].NextFreeBlock, rdx
mov TMediumFreeBlock[rdx].PreviousFreeBlock, rax
mov TMediumFreeBlock[rcx].NextFreeBlock, rax
// Was this bin empty?
cmp rdx, rcx
jne @Done
// Get ecx=bin number, edx=group number
mov rcx, r9
mov rdx, r9
shr edx, 5
// Flag this bin as not empty
mov eax, 1
shl eax, cl
or dword ptr [r10 + TMediumBlockInfo.BinBitmaps + rdx * 4], eax
// Flag the group as not empty
mov eax, 1
mov ecx, edx
shl eax, cl
or [r10 + TMediumBlockInfo.BinGroupBitmap], eax
@Done:
end;
procedure RemoveMediumFreeBlock; nostackframe; assembler;
asm
// rcx=MediumFreeBlock r10=TMediumBlockInfo - even on POSIX
// Get the current previous and next blocks
mov rdx, TMediumFreeBlock[rcx].PreviousFreeBlock
mov rcx, TMediumFreeBlock[rcx].NextFreeBlock
// Remove this block from the linked list
mov TMediumFreeBlock[rcx].PreviousFreeBlock, rdx
mov TMediumFreeBlock[rdx].NextFreeBlock, rcx
// Is this bin now empty? If the previous and next free block pointers are
// equal, they must point to the bin
cmp rcx, rdx
jne @Done
// Get ecx=bin number, edx=group number
lea r8, [r10 + TMediumBlockInfo.Bins]
sub rcx, r8
mov edx, ecx
shr ecx, 4
shr edx, 9
// Flag this bin as empty
mov eax, -2
rol eax, cl
and dword ptr [r10 + TMediumBlockInfo.BinBitmaps + rdx * 4], eax
jnz @Done
// Flag this group as empty
mov eax, -2
mov ecx, edx
rol eax, cl
and [r10 + TMediumBlockInfo.BinGroupBitmap], eax
@Done:
end;
procedure BinMediumSequentialFeedRemainder(
var Info: TMediumBlockInfo); nostackframe; assembler;
asm
mov r10, Info
mov eax, [Info + TMediumBlockInfo.SequentialFeedBytesLeft]
test eax, eax
jz @Done
// Is the last fed sequentially block free?
mov rax, [Info + TMediumBlockInfo.LastSequentiallyFed]
test byte ptr [rax - BlockHeaderSize], IsFreeBlockFlag
jnz @LastBlockFedIsFree
// Set the "previous block is free" flag in the last block fed
or qword ptr [rax - BlockHeaderSize], PreviousMediumBlockIsFreeFlag
// Get edx=remainder size, rax=remainder start
mov edx, [r10 + TMediumBlockInfo.SequentialFeedBytesLeft]
sub rax, rdx
@BinTheRemainder:
// Store the size of the block as well as the flags
lea rcx, [rdx + IsMediumBlockFlag + IsFreeBlockFlag]
mov [rax - BlockHeaderSize], rcx
// Store the trailing size marker
mov [rax + rdx - 16], rdx
// Bin this medium block
cmp edx, MinimumMediumBlockSize
jb @Done
mov rcx, rax
jmp InsertMediumBlockIntoBin // rcx=APMediumFreeBlock, edx=AMediumBlockSize
@Done: ret
@LastBlockFedIsFree:
// Drop the flags
mov rdx, DropMediumAndLargeFlagsMask
and rdx, [rax - BlockHeaderSize]
// Free the last block fed
cmp edx, MinimumMediumBlockSize
jb @DontRemoveLastFed
// Last fed block is free - remove it from its size bin
mov rcx, rax
call RemoveMediumFreeBlock // rcx = APMediumFreeBlock
// Re-read rax and rdx
mov rax, [r10 + TMediumBlockInfo.LastSequentiallyFed]
mov rdx, DropMediumAndLargeFlagsMask
and rdx, [rax - BlockHeaderSize]
@DontRemoveLastFed:
// Get the number of bytes left in ecx
mov ecx, [r10 + TMediumBlockInfo.SequentialFeedBytesLeft]
// rax = remainder start, rdx = remainder size
sub rax, rcx
add edx, ecx
jmp @BinTheRemainder
end;
procedure LockLargeBlocks;
{$ifdef NOSFRAME} nostackframe; {$endif} assembler;
asm
@s: mov eax, $100
lea rcx, [rip + LargeBlocksLocked]
lock cmpxchg byte ptr [rcx], ah
je @ok
{$ifdef FPCMM_SLEEPTSC}
rdtsc
shl rdx, 32
lea r9, [rax + rdx + SpinLargeLockTSC] // r9 = endtsc
@sp: pause
rdtsc
shl rdx, 32
or rax, rdx
cmp rax, r9
ja @rc // timeout
{$else}
mov edx, SpinLargeLockCount
@sp: pause
dec edx
jz @rc // timeout
{$endif FPCMM_SLEEPTSC}
mov eax, $100
{$ifdef FPCMM_CMPBEFORELOCK_SPIN}
cmp byte ptr [rcx], true
je @sp
{$endif FPCMM_CMPBEFORELOCK_SPIN}
lock cmpxchg byte ptr [rcx], ah
je @ok
jmp @sp
@rc: call ReleaseCore
lea rax, [rip + HeapStatus]
{$ifdef FPCMM_DEBUG} lock {$endif}
inc qword ptr [rax].TMMStatus.Large.SleepCount
jmp @s
@ok: // reset the stack frame before ret
end;
procedure FreeMedium(ptr: PMediumBlockPoolHeader);
begin
FreeMediumLarge(ptr, MediumBlockPoolSizeMem);
NotifyMediumLargeFree(HeapStatus.Medium, MediumBlockPoolSizeMem);
end;
{$ifdef FPCMM_MEDIUMPREFETCH}
function TryAllocMediumPrefetch(var Info: TMediumBlockInfo): pointer;
nostackframe; assembler;
asm
xor eax, eax
mov rcx, Info
cmp qword ptr [rcx].TMediumBlockInfo.Prefetch, rax
jz @s // is there a prefetched memory chunk available?
xor edx, edx
mov eax, $100
lock cmpxchg byte ptr [rcx].TMediumBlockInfo.PrefetchLocked, ah
jne @s
// just get the memory chunk - no need to call mmap/VirtualAlloc
mov rax, qword ptr [rcx].TMediumBlockInfo.Prefetch
mov qword ptr [rcx].TMediumBlockInfo.Prefetch, rdx
mov byte ptr [rcx].TMediumBlockInfo.PrefetchLocked, dl
@s:
end;
{$endif FPCMM_MEDIUMPREFETCH}
function AllocNewSequentialFeedMediumPool(BlockSize: cardinal;
var Info: TMediumBlockInfo): pointer;
var
old: PMediumBlockPoolHeader;
new: pointer;
begin
BinMediumSequentialFeedRemainder(Info);
{$ifdef FPCMM_MEDIUMPREFETCH}
new := TryAllocMediumPrefetch(Info);
if new = nil then
{$endif FPCMM_MEDIUMPREFETCH}
new := AllocMedium(MediumBlockPoolSizeMem);
if new <> nil then
begin
old := Info.PoolsCircularList.NextMediumBlockPoolHeader;
PMediumBlockPoolHeader(new).PreviousMediumBlockPoolHeader := @Info.PoolsCircularList;
Info.PoolsCircularList.NextMediumBlockPoolHeader := new;
PMediumBlockPoolHeader(new).NextMediumBlockPoolHeader := old;
old.PreviousMediumBlockPoolHeader := new;
PPtrUInt(PByte(new) + MediumBlockPoolSize - BlockHeaderSize)^ := IsMediumBlockFlag;
Info.SequentialFeedBytesLeft :=
(MediumBlockPoolSize - MediumBlockPoolHeaderSize) - BlockSize;
result := pointer(PByte(new) + MediumBlockPoolSize - BlockSize);
Info.LastSequentiallyFed := result;
PPtrUInt(PByte(result) - BlockHeaderSize)^ := BlockSize or IsMediumBlockFlag;
NotifyArenaAlloc(HeapStatus.Medium, MediumBlockPoolSizeMem);
end
else
begin
Info.SequentialFeedBytesLeft := 0; // system is unstable for sure
result := nil;
end;
end;
function ComputeLargeBlockSize(size: PtrUInt): PtrUInt; inline;
begin
inc(size, LargeBlockHeaderSize - 1 + BlockHeaderSize);
{$ifdef FPCMM_LARGEBIGALIGN}
// on Linux, mremap() on PMD_SIZE=2MB aligned data make a huge speedup
if size >= LargeBlockGranularity2Size then // trigger if size>=4MB
result := (size + LargeBlockGranularity2) and -LargeBlockGranularity2
else
{$endif FPCMM_LARGEBIGALIGN}
// use default 64KB granularity
result := (size + LargeBlockGranularity) and -LargeBlockGranularity;
end;
function AllocateLargeBlockFrom(existing: pointer;
oldblocksize, newblocksize: PtrUInt): pointer;
var
new, old: PLargeBlockHeader;
begin
if existing = nil then
new := AllocLarge(newblocksize)
else
new := RemapLarge(existing, oldblocksize, newblocksize);
if new <> nil then
begin
NotifyArenaAlloc(HeapStatus.Large, newblocksize);
if existing <> nil then
NotifyMediumLargeFree(HeapStatus.Large, oldblocksize);
new.BlockSizeAndFlags := newblocksize or IsLargeBlockFlag;
LockLargeBlocks;
old := LargeBlocksCircularList.NextLargeBlockHeader;
new.PreviousLargeBlockHeader := @LargeBlocksCircularList;
LargeBlocksCircularList.NextLargeBlockHeader := new;
new.NextLargeBlockHeader := old;
old.PreviousLargeBlockHeader := new;
LargeBlocksLocked := False;
inc(new);
end;
result := new;
end;
function AllocateLargeBlock(size: PtrUInt): pointer;
begin
result := AllocateLargeBlockFrom(nil, 0, ComputeLargeBlockSize(size));
end;
procedure FreeLarge(ptr: PLargeBlockHeader; size: PtrUInt);
begin
NotifyMediumLargeFree(HeapStatus.Large, size);
FreeMediumLarge(ptr, size);
end;
function FreeLargeBlock(p: pointer): PtrInt;
var
header, prev, next: PLargeBlockHeader;
begin
header := pointer(PByte(p) - LargeBlockHeaderSize);
if header.BlockSizeAndFlags and IsFreeBlockFlag <> 0 then
begin
// try to release the same pointer twice
result := 0;
exit;
end;
LockLargeBlocks;
prev := header.PreviousLargeBlockHeader;
next := header.NextLargeBlockHeader;
next.PreviousLargeBlockHeader := prev;
prev.NextLargeBlockHeader := next;
LargeBlocksLocked := False;
result := DropMediumAndLargeFlagsMask and header.BlockSizeAndFlags;
FreeLarge(header, result);
end;
function ReallocateLargeBlock(p: pointer; size: PtrUInt): pointer;
var
oldavail, minup, new, old: PtrUInt;
prev, next, header: PLargeBlockHeader;
begin
header := pointer(PByte(p) - LargeBlockHeaderSize);
oldavail := (DropMediumAndLargeFlagsMask and header^.BlockSizeAndFlags) -
(LargeBlockHeaderSize + BlockHeaderSize);
new := size;
if size > oldavail then
begin
// size-up with 1/8 or 1/4 overhead for any future growing realloc
if oldavail > 128 shl 20 then
minup := oldavail + oldavail shr 3
else
minup := oldavail + oldavail shr 2;
if size < minup then
new := minup;
end
else
begin
result := p;
oldavail := oldavail shr 1;
if size >= oldavail then
// small size-up within current buffer -> no reallocate
exit
else
// size-down and move just the trailing data
oldavail := size;
end;
if new < MaximumMediumBlockSize then
begin
// size was reduced to a small/medium block: use GetMem/Move/FreeMem
result := _GetMem(new);
if result <> nil then
Move(p^, result^, oldavail); // RTL non-volatile asm or our AVX MoveFast()
_FreeMem(p);
end
else
begin
old := DropMediumAndLargeFlagsMask and header^.BlockSizeAndFlags;
size := ComputeLargeBlockSize(new);
if size = old then
// no need to realloc anything (paranoid check: should be handled above)
result := p
else
begin
// remove previous large block from current chain list
LockLargeBlocks;
prev := header^.PreviousLargeBlockHeader;
next := header^.NextLargeBlockHeader;
next.PreviousLargeBlockHeader := prev;
prev.NextLargeBlockHeader := next;
LargeBlocksLocked := False;
// on Linux, call Kernel mremap() and its TLB magic
// on Windows, try to reserve the memory block just after the existing
// otherwise, use Alloc/Move/Free pattern, with asm/AVX move
result := AllocateLargeBlockFrom(header, old, size);
end;
end;
end;
{ ********* Main Memory Manager Functions }
function _GetMem(size: PtrUInt): pointer;
{$ifdef NOSFRAME} nostackframe; {$endif} assembler;
asm
{$ifndef MSWINDOWS}
mov rcx, size
{$else}
push rsi
push rdi
{$endif MSWINDOWS}
push rbx
// Since most allocations are for small blocks, determine small block type
lea rbx, [rip + SmallBlockInfo]
@VoidSizeToSomething:
lea rdx, [size + BlockHeaderSize - 1]
shr rdx, 4 // div SmallBlockGranularity
// Is it a tiny/small block?
cmp size, (MaximumSmallBlockSize - BlockHeaderSize)
ja @NotTinySmallBlock
test size, size
jz @VoidSize
{$ifndef FPCMM_ASSUMEMULTITHREAD}
mov rax, qword ptr [rbx].TSmallBlockInfo.IsMultiThreadPtr
{$endif FPCMM_ASSUMEMULTITHREAD}
// Get the tiny/small TSmallBlockType[] offset in rcx
movzx ecx, byte ptr [rbx + rdx].TSmallBlockInfo.GetmemLookup
mov r8, rbx
shl ecx, SmallBlockTypePO2
// ---------- Acquire block type lock ----------
{$ifndef FPCMM_ASSUMEMULTITHREAD}
cmp byte ptr [rax], false
je @GotLockOnSmallBlock // no lock if IsMultiThread=false
{$endif FPCMM_ASSUMEMULTITHREAD}
// Can use one of the several arenas reserved for tiny blocks?
cmp ecx, SizeOf(TTinyBlockTypes)
jae @NotTinyBlockType
// ---------- TINY (size<=128B) block lock ----------
@LockTinyBlockTypeLoop:
// Round-Robin attempt to lock next SmallBlockInfo.Tiny[]
// -> fair distribution among calls to reduce thread contention
mov dl, NumTinyBlockArenas + 1 // 8/16 arenas (including Small[])
@TinyBlockArenaLoop:
mov eax, SizeOf(TTinyBlockTypes)
// note: "lock xadd" decreases the loop iterations but is slower
xadd dword ptr [r8 + TSmallBlockInfo.TinyCurrentArena], eax
lea rbx, [r8 + rcx]
and eax, ((NumTinyBlockArenas + 1) * SizeOf(TTinyBlockTypes)) - 1
jz @TinySmall // Arena 0 = TSmallBlockInfo.Small[]
lea rbx, [rax + rbx + TSmallBlockInfo.Tiny - SizeOf(TTinyBlockTypes)]
@TinySmall:
mov eax, $100
{$ifdef FPCMM_CMPBEFORELOCK}
cmp byte ptr [rbx].TSmallBlockType.Locked, false
jnz @NextTinyBlockArena1
{$endif FPCMM_CMPBEFORELOCK}
lock cmpxchg byte ptr [rbx].TSmallBlockType.Locked, ah
je @GotLockOnSmallBlockType
@NextTinyBlockArena1:
dec dl
jnz @TinyBlockArenaLoop
// Fallback to SmallBlockInfo.Small[] next 2 small sizes - never occurs
lea rbx, [r8 + rcx + TSmallBlockInfo.Small + SizeOf(TSmallBlockType)]
mov eax, $100
lock cmpxchg byte ptr [rbx].TSmallBlockType.Locked, ah
je @GotLockOnSmallBlockType
add rbx, SizeOf(TSmallBlockType) // next two small sizes
mov eax, $100
lock cmpxchg byte ptr [rbx].TSmallBlockType.Locked, ah
je @GotLockOnSmallBlockType
// Thread Contention (_Freemem is more likely)
{$ifdef FPCMM_DEBUG} lock {$endif}
inc dword ptr [rbx].TSmallBlockType.GetmemSleepCount
push r8
push rcx
call ReleaseCore
pop rcx
pop r8
jmp @LockTinyBlockTypeLoop
// ---------- SMALL (size<2600) block lock ----------
@NotTinyBlockType:
lea rbx, [r8 + rcx].TSmallBlockInfo.Small
@LockBlockTypeLoopRetry:
{$ifdef FPCMM_PAUSE}
{$ifdef FPCMM_SLEEPTSC}
rdtsc
shl rdx, 32
lea r9, [rax + rdx + SpinSmallGetmemLockTSC] // r9 = endtsc
{$else}
mov edx, SpinSmallGetmemLockCount
{$endif FPCMM_SLEEPTSC}
{$endif FPCMM_PAUSE}
@LockBlockTypeLoop:
// Grab the default block type
mov eax, $100
{$ifdef FPCMM_CMPBEFORELOCK}
cmp byte ptr [rbx].TSmallBlockType.Locked, false
jnz @NextLockBlockType1
{$endif FPCMM_CMPBEFORELOCK}
lock cmpxchg byte ptr [rbx].TSmallBlockType.Locked, ah
je @GotLockOnSmallBlockType
// Try up to two next sizes
mov eax, $100
@NextLockBlockType1:
add rbx, SizeOf(TSmallBlockType)
{$ifdef FPCMM_CMPBEFORELOCK}
cmp byte ptr [rbx].TSmallBlockType.Locked, al
jnz @NextLockBlockType2
{$endif FPCMM_CMPBEFORELOCK}
lock cmpxchg byte ptr [rbx].TSmallBlockType.Locked, ah
je @GotLockOnSmallBlockType
mov eax, $100
@NextLockBlockType2:
add rbx, SizeOf(TSmallBlockType)
pause
{$ifdef FPCMM_CMPBEFORELOCK}
cmp byte ptr [rbx].TSmallBlockType.Locked, al
jnz @NextLockBlockType3
{$endif FPCMM_CMPBEFORELOCK}
lock cmpxchg byte ptr [rbx].TSmallBlockType.Locked, ah
je @GotLockOnSmallBlockType
@NextLockBlockType3:
sub rbx, 2 * SizeOf(TSmallBlockType)
{$ifdef FPCMM_PAUSE}
pause
{$ifdef FPCMM_SLEEPTSC}
rdtsc
shl rdx, 32
or rax, rdx
cmp rax, r9
jb @LockBlockTypeLoop // continue spinning until timeout
{$else}
dec edx
jnz @LockBlockTypeLoop // continue until spin count reached
{$endif FPCMM_SLEEPTSC}
{$endif FPCMM_PAUSE}
// Block type and two sizes larger are all locked - give up and sleep
{$ifdef FPCMM_DEBUG} lock {$endif}
inc dword ptr [rbx].TSmallBlockType.GetmemSleepCount
call ReleaseCore
jmp @LockBlockTypeLoopRetry
// ---------- TINY/SMALL block registration ----------
{$ifndef FPCMM_ASSUMEMULTITHREAD}
@GotLockOnSmallBlock:
add rbx, rcx
{$endif FPCMM_ASSUMEMULTITHREAD}
@GotLockOnSmallBlockType:
// set rdx=NextPartiallyFreePool rax=FirstFreeBlock rcx=DropSmallFlagsMask
mov rdx, [rbx].TSmallBlockType.NextPartiallyFreePool
add [rbx].TSmallBlockType.GetmemCount, 1
mov rax, [rdx].TSmallBlockPoolHeader.FirstFreeBlock
mov rcx, DropSmallFlagsMask
// Is there a pool with free blocks?
cmp rdx, rbx
je @TrySmallSequentialFeed
add [rdx].TSmallBlockPoolHeader.BlocksInUse, 1
// Set the new first free block and the block header
and rcx, [rax - BlockHeaderSize]
mov [rdx].TSmallBlockPoolHeader.FirstFreeBlock, rcx
mov [rax - BlockHeaderSize], rdx
// Is the chunk now full?
jz @RemoveSmallPool
// Unlock the block type and leave
mov byte ptr [rbx].TSmallBlockType.Locked, false
{$ifdef NOSFRAME}
pop rbx
ret
{$else}
jmp @Done // on Win64, a stack frame is required
{$endif NOSFRAME}
@VoidSize:
inc size // "we always need to allocate something" (see RTL heap.inc)
jmp @VoidSizeToSomething
@TrySmallSequentialFeed:
// Feed a small block sequentially
movzx ecx, [rbx].TSmallBlockType.BlockSize
mov rdx, [rbx].TSmallBlockType.CurrentSequentialFeedPool
add rcx, rax
// Can another block fit?
cmp rax, [rbx].TSmallBlockType.MaxSequentialFeedBlockAddress
ja @AllocateSmallBlockPool
// Adjust number of used blocks and sequential feed pool
mov [rbx].TSmallBlockType.NextSequentialFeedBlockAddress, rcx
add [rdx].TSmallBlockPoolHeader.BlocksInUse, 1
// Unlock the block type, set the block header and leave
mov byte ptr [rbx].TSmallBlockType.Locked, false
mov [rax - BlockHeaderSize], rdx
{$ifdef NOSFRAME}
pop rbx
ret
{$else}
jmp @Done // on Win64, a stack frame is required
{$endif NOSFRAME}
@RemoveSmallPool:
// Pool is full - remove it from the partially free list
mov rcx, [rdx].TSmallBlockPoolHeader.NextPartiallyFreePool
mov [rcx].TSmallBlockPoolHeader.PreviousPartiallyFreePool, rbx
mov [rbx].TSmallBlockType.NextPartiallyFreePool, rcx
// Unlock the block type and leave
mov byte ptr [rbx].TSmallBlockType.Locked, false
{$ifdef NOSFRAME}
pop rbx
ret
{$else}
jmp @Done // on Win64, a stack frame is required
{$endif NOSFRAME}
@AllocateSmallBlockPool:
// Access shared information about Medium blocks storage
lea rcx, [rip + SmallMediumBlockInfo]
mov r10, rcx
{$ifndef FPCMM_ASSUMEMULTITHREAD}
mov rax, [rcx + TMediumBlockinfo.IsMultiThreadPtr]
cmp byte ptr [rax], false
je @MediumLocked1 // no lock if IsMultiThread=false
{$endif FPCMM_ASSUMEMULTITHREAD}
mov eax, $100
lock cmpxchg byte ptr [rcx].TMediumBlockInfo.Locked, ah
je @MediumLocked1
call LockMediumBlocks
@MediumLocked1:
// Are there any available blocks of a suitable size?
movsx esi, [rbx].TSmallBlockType.AllowedGroupsForBlockPoolBitmap
and esi, [r10 + TMediumBlockInfo.BinGroupBitmap]
jz @NoSuitableMediumBlocks
// Compute rax = bin group number with free blocks, rcx = bin number
bsf eax, esi
lea r9, [rax * 4]
mov ecx, [r10 + TMediumBlockInfo.BinBitmaps + r9]
bsf ecx, ecx
lea rcx, [rcx + r9 * 8]
// Set rdi = @bin, rsi = free block
lea rsi, [rcx * 8] // SizeOf(TMediumBlockBin) = 16
lea rdi, [r10 + TMediumBlockInfo.Bins + rsi * 2]
mov rsi, TMediumFreeBlock[rdi].NextFreeBlock
// Remove the first block from the linked list (LIFO)
mov rdx, TMediumFreeBlock[rsi].NextFreeBlock
mov TMediumFreeBlock[rdi].NextFreeBlock, rdx
mov TMediumFreeBlock[rdx].PreviousFreeBlock, rdi
// Is this bin now empty?
cmp rdi, rdx
jne @MediumBinNotEmpty
// rbx = block type, rax = bin group number,
// r9 = bin group number * 4, rcx = bin number, rdi = @bin, rsi = free block
// Flag this bin (and the group if needed) as empty
mov edx, - 2
mov r11d, [r10 + TMediumBlockInfo.BinGroupBitmap]
rol edx, cl
btr r11d, eax // btr reg,reg is faster than btr [mem],reg
and [r10 + TMediumBlockInfo.BinBitmaps + r9], edx
jnz @MediumBinNotEmpty
mov [r10 + TMediumBlockInfo.BinGroupBitmap], r11d
@MediumBinNotEmpty:
// rsi = free block, rbx = block type
// Get the size of the available medium block in edi
mov rdi, DropMediumAndLargeFlagsMask
and rdi, [rsi - BlockHeaderSize]
cmp edi, MaximumSmallBlockPoolSize
jb @UseWholeBlock
// Split the block: new block size is the optimal size
mov edx, edi
movzx edi, [rbx].TSmallBlockType.OptimalBlockPoolSize
sub edx, edi
lea rcx, [rsi + rdi]
lea rax, [rdx + IsMediumBlockFlag + IsFreeBlockFlag]
mov [rcx - BlockHeaderSize], rax
// Store the size of the second split as the second last pointer
mov [rcx + rdx - 16], rdx
// Put the remainder in a bin (it will be big enough)
call InsertMediumBlockIntoBin // rcx=APMediumFreeBlock, edx=AMediumBlockSize
jmp @GotMediumBlock
@NoSuitableMediumBlocks:
// Check the sequential feed medium block pool for space
movzx ecx, [rbx].TSmallBlockType.MinimumBlockPoolSize
mov edi, [r10 + TMediumBlockInfo.SequentialFeedBytesLeft]
cmp edi, ecx
jb @AllocateNewSequentialFeed
// Get the address of the last block that was fed
mov rsi, [r10 + TMediumBlockInfo.LastSequentiallyFed]
// Enough sequential feed space: Will the remainder be usable?
movzx ecx, [rbx].TSmallBlockType.OptimalBlockPoolSize
lea rdx, [rcx + MinimumMediumBlockSize]
cmp edi, edx
cmovae edi, ecx
sub rsi, rdi
// Update the sequential feed parameters
sub [r10 + TMediumBlockInfo.SequentialFeedBytesLeft], edi
mov [r10 + TMediumBlockInfo.LastSequentiallyFed], rsi
jmp @GotMediumBlock
@AllocateNewSequentialFeed:
// Use the optimal size for allocating this small block pool
{$ifdef MSWINDOWS}
movzx ecx, word ptr [rbx].TSmallBlockType.OptimalBlockPoolSize
lea rdx, [rip + SmallMediumBlockInfo]
push rcx
push rdx
{$else}
movzx edi, word ptr [rbx].TSmallBlockType.OptimalBlockPoolSize
lea rsi, [rip + SmallMediumBlockInfo]
push rdi
push rsi
{$endif MSWINDOWS}
// on input: ecx/edi=BlockSize, rdx/rsi=Info
call AllocNewSequentialFeedMediumPool
pop r10
pop rdi // restore edi=blocksize and r10=TMediumBlockInfo
mov rsi, rax
test rax, rax
jnz @GotMediumBlock // rsi=freeblock rbx=blocktype edi=blocksize
mov [r10 + TMediumBlockInfo.Locked], al
mov [rbx].TSmallBlockType.Locked, al
{$ifdef NOSFRAME}
pop rbx
ret
{$else}
jmp @Done // on Win64, a stack frame is required
{$endif NOSFRAME}
@UseWholeBlock:
// rsi = free block, rbx = block type, edi = block size
// Mark this block as used in the block following it
and byte ptr [rsi + rdi - BlockHeaderSize], NOT PreviousMediumBlockIsFreeFlag
@GotMediumBlock:
// rsi = free block, rbx = block type, edi = block size
// Set the size and flags for this block
lea rcx, [rdi + IsMediumBlockFlag + IsSmallBlockPoolInUseFlag]
mov [rsi - BlockHeaderSize], rcx
// Unlock medium blocks and setup the block pool
xor eax, eax
mov [r10 + TMediumBlockInfo.Locked], al
mov TSmallBlockPoolHeader[rsi].BlockType, rbx
mov TSmallBlockPoolHeader[rsi].FirstFreeBlock, rax
mov TSmallBlockPoolHeader[rsi].BlocksInUse, 1
mov [rbx].TSmallBlockType.CurrentSequentialFeedPool, rsi
// Return the pointer to the first block, compute next/last block addresses
lea rax, [rsi + SmallBlockPoolHeaderSize]
movzx ecx, [rbx].TSmallBlockType.BlockSize
lea rdx, [rax + rcx]
mov [rbx].TSmallBlockType.NextSequentialFeedBlockAddress, rdx
add rdi, rsi
sub rdi, rcx
mov [rbx].TSmallBlockType.MaxSequentialFeedBlockAddress, rdi
// Unlock the small block type, set header and leave
mov byte ptr [rbx].TSmallBlockType.Locked, false
mov [rax - BlockHeaderSize], rsi
{$ifdef NOSFRAME}
pop rbx
ret
{$else}
jmp @Done // on Win64, a stack frame is required
{$endif NOSFRAME}
// ---------- MEDIUM block allocation ----------
@NotTinySmallBlock:
// Do we need a Large block?
lea r10, [rip + MediumBlockInfo]
cmp rcx, MaximumMediumBlockSize - BlockHeaderSize
ja @IsALargeBlockRequest
// Get the bin size for this block size (rounded up to the next bin size)
lea rbx, [rcx + MediumBlockGranularity - 1 + BlockHeaderSize - MediumBlockSizeOffset]
mov rcx, r10
and ebx, - MediumBlockGranularity
add ebx, MediumBlockSizeOffset
{$ifndef FPCMM_ASSUMEMULTITHREAD}
mov rax, [r10 + TMediumBlockinfo.IsMultiThreadPtr]
cmp byte ptr [rax], false
je @MediumLocked2 // no lock if IsMultiThread=false
{$endif FPCMM_ASSUMEMULTITHREAD}
mov eax, $100
lock cmpxchg byte ptr [rcx].TMediumBlockInfo.Locked, ah
je @MediumLocked2
call LockMediumBlocks
@MediumLocked2:
// Compute ecx = bin number in ecx and edx = group number
lea rdx, [rbx - MinimumMediumBlockSize]
mov ecx, edx
shr edx, 8 + 5
shr ecx, 8
mov eax, -1
shl eax, cl
and eax, [r10 + TMediumBlockInfo.BinBitmaps + rdx * 4]
jz @GroupIsEmpty
and ecx, - 32
bsf eax, eax
or ecx, eax
jmp @GotBinAndGroup
@GroupIsEmpty:
// Try all groups greater than this group
mov eax, - 2
mov ecx, edx
shl eax, cl
and eax, [r10 + TMediumBlockInfo.BinGroupBitmap]
jz @TrySequentialFeedMedium
// There is a suitable group with enough space
bsf edx, eax
mov eax, [r10 + TMediumBlockInfo.BinBitmaps + rdx * 4]
bsf ecx, eax
mov eax, edx
shl eax, 5
or ecx, eax
jmp @GotBinAndGroup
@TrySequentialFeedMedium:
mov ecx, [r10 + TMediumBlockInfo.SequentialFeedBytesLeft]
// Can block be fed sequentially?
sub ecx, ebx
jc @AllocateNewSequentialFeedForMedium
// Get the block address, store remaining bytes, set the flags and unlock
mov rax, [r10 + TMediumBlockInfo.LastSequentiallyFed]
sub rax, rbx
mov [r10 + TMediumBlockInfo.LastSequentiallyFed], rax
mov [r10 + TMediumBlockInfo.SequentialFeedBytesLeft], ecx
or rbx, IsMediumBlockFlag
mov [rax - BlockHeaderSize], rbx
mov byte ptr [r10 + TMediumBlockInfo.Locked], false
{$ifdef NOSFRAME}
pop rbx
ret
{$else}
jmp @Done // on Win64, a stack frame is required
{$endif NOSFRAME}
@AllocateNewSequentialFeedForMedium:
{$ifdef MSWINDOWS}
mov ecx, ebx
lea rdx, [rip + MediumBlockInfo]
{$else}
mov edi, ebx
lea rsi, [rip + MediumBlockInfo]
{$endif MSWINDOWS}
// on input: ecx/edi=BlockSize, rdx/rsi=Info
call AllocNewSequentialFeedMediumPool
mov byte ptr [rip + MediumBlockInfo.Locked], false
{$ifdef NOSFRAME}
pop rbx
ret
{$else}
jmp @Done // on Win64, a stack frame is required
{$endif NOSFRAME}
@GotBinAndGroup:
// ebx = block size, ecx = bin number, edx = group number
// Compute rdi = @bin, rsi = free block
lea rax, [rcx + rcx]
lea rdi, [r10 + TMediumBlockInfo.Bins + rax * 8]
mov rsi, TMediumFreeBlock[rdi].NextFreeBlock
// Remove the first block from the linked list (LIFO)
mov rax, TMediumFreeBlock[rsi].NextFreeBlock
mov TMediumFreeBlock[rdi].NextFreeBlock, rax
mov TMediumFreeBlock[rax].PreviousFreeBlock, rdi
// Is this bin now empty?
cmp rdi, rax
jne @MediumBinNotEmptyForMedium
// edx=bingroupnumber, ecx=binnumber, rdi=@bin, rsi=freeblock, ebx=blocksize
// Flag this bin and group as empty
mov eax, - 2
mov r11d, [r10 + TMediumBlockInfo.BinGroupBitmap]
rol eax, cl
btr r11d, edx // btr reg,reg is faster than btr [mem],reg
and [r10 + TMediumBlockInfo.BinBitmaps + rdx * 4], eax
jnz @MediumBinNotEmptyForMedium
mov [r10 + TMediumBlockInfo.BinGroupBitmap], r11d
@MediumBinNotEmptyForMedium:
// rsi = free block, ebx = block size
// Get rdi = size of the available medium block, rdx = second split size
mov rdi, DropMediumAndLargeFlagsMask
and rdi, [rsi - BlockHeaderSize]
mov edx, edi
sub edx, ebx
jz @UseWholeBlockForMedium
// Split the block in two
lea rcx, [rsi + rbx]
lea rax, [rdx + IsMediumBlockFlag + IsFreeBlockFlag]
mov [rcx - BlockHeaderSize], rax
// Store the size of the second split as the second last pointer
mov [rcx + rdx - 16], rdx
// Put the remainder in a bin
cmp edx, MinimumMediumBlockSize
jb @GotMediumBlockForMedium
call InsertMediumBlockIntoBin // rcx=APMediumFreeBlock, edx=AMediumBlockSize
jmp @GotMediumBlockForMedium
@UseWholeBlockForMedium:
// Mark this block as used in the block following it
and byte ptr [rsi + rdi - BlockHeaderSize], NOT PreviousMediumBlockIsFreeFlag
@GotMediumBlockForMedium:
// Set the size and flags for this block
lea rcx, [rbx + IsMediumBlockFlag]
mov [rsi - BlockHeaderSize], rcx
// Unlock medium blocks and leave
mov byte ptr [r10 + TMediumBlockInfo.Locked], false
mov rax, rsi
{$ifdef NOSFRAME}
pop rbx
ret
{$else}
jmp @Done // on Win64, a stack frame is required
{$endif NOSFRAME}
// ---------- LARGE block allocation ----------
@IsALargeBlockRequest:
xor rax, rax
test rcx, rcx
js @Done
// Note: size is still in the rcx/rdi first param register
call AllocateLargeBlock
@Done: // restore registers and the stack frame before ret
pop rbx
{$ifdef MSWINDOWS}
pop rdi
pop rsi
{$endif MSWINDOWS}
end;
function FreeMediumBlock(arg1: pointer): PtrUInt;
{$ifdef NOSFRAME} nostackframe; {$endif} assembler;
// rcx=P rdx=[P-BlockHeaderSize] r10=TMediumBlockInfo
// (arg1 is used only for proper call of pascal functions below on all ABI)
asm
// Drop the flags, and set r11=P rbx=blocksize
and rdx, DropMediumAndLargeFlagsMask
push rbx
push rdx // save blocksize
mov rbx, rdx
mov r11, rcx
// Lock the Medium blocks
mov rcx, r10
{$ifndef FPCMM_ASSUMEMULTITHREAD}
mov rax, [r10 + TMediumBlockinfo.IsMultiThreadPtr]
cmp byte ptr [rax], false
je @MediumBlocksLocked // no lock if IsMultiThread=false
{$endif FPCMM_ASSUMEMULTITHREAD}
mov eax, $100
lock cmpxchg byte ptr [rcx].TMediumBlockInfo.Locked, ah
je @MediumBlocksLocked
{$ifdef FPCMM_LOCKLESSFREEMEDIUM}
// locked: try to put r11=P in TMediumBlockInfo.LocklessBin.Instance[]
lea rcx, [rcx].TMediumBlockInfo.LocklessBin
cmp byte ptr [rcx].TMediumLocklessBin.Count, MediumBlockLocklessBinCount
je @DoLock // all slots are filled
mov r9d, SpinFreememBinCount
@BinSp: mov eax, $100
lock cmpxchg byte ptr [rcx].TMediumLocklessBin.Locked, ah
je @BinOk
@BinNo: pause
dec r9d
jnz @BinSp
jmp @DoLock
@BinOk: // we acquired TMediumLocklessBin.Locked
movzx eax, byte ptr [rcx].TMediumLocklessBin.Count
cmp al, MediumBlockLocklessBinCount
je @DoLoc2
add byte ptr [rcx].TMediumLocklessBin.Count, 1
cmp al, byte ptr [rcx].TMediumLocklessBin.MaxCount
jb @max
mov byte ptr [rcx].TMediumLocklessBin.MaxCount, al
@max:
mov [rcx + TMediumLocklessBin.Instance + rax * 8], r11
mov byte ptr [rcx].TMediumLocklessBin.Locked, false
jmp @Quit
@DoLoc2:mov byte ptr [rcx].TMediumLocklessBin.Locked, false
@DoLock:{$endif FPCMM_LOCKLESSFREEMEDIUM}
call LockMediumBlocks
@MediumBlocksLocked:
// Get rcx = next block size and flags
mov rcx, [r11 + rbx - BlockHeaderSize]
// Can we combine this block with the next free block?
test qword ptr [r11 + rbx - BlockHeaderSize], IsFreeBlockFlag
jnz @NextBlockIsFree
// Set the "PreviousIsFree" flag in the next block
or rcx, PreviousMediumBlockIsFreeFlag
mov [r11 + rbx - BlockHeaderSize], rcx
@NextBlockChecked:
// Re-read the flags and try to combine with previous free block
test byte ptr [r11 - BlockHeaderSize], PreviousMediumBlockIsFreeFlag
jnz @PreviousBlockIsFree
@PreviousBlockChecked:
// Check if entire medium block pool is free
cmp ebx, (MediumBlockPoolSize - MediumBlockPoolHeaderSize)
je @EntireMediumPoolFree
@Bin: // Store size of the block, flags and trailing size marker and insert into bin
lea rax, [rbx + IsMediumBlockFlag + IsFreeBlockFlag]
mov [r11 - BlockHeaderSize], rax
mov [r11 + rbx - 16], rbx
mov rcx, r11
mov rdx, rbx
call InsertMediumBlockIntoBin // rcx=P, edx=blocksize
{$ifdef FPCMM_LOCKLESSFREEMEDIUM}
// recycle any pending TMediumLocklessBin.Instance[] pointer
lea rcx, [r10].TMediumBlockInfo.LocklessBin
cmp byte ptr [rcx].TMediumLocklessBin.Count, 0
je @Done
mov eax, $100
lock cmpxchg byte ptr [rcx].TMediumLocklessBin.Locked, ah // just try once
jne @Done
// compute r11=P and rbx=blocksize from pending pointer
movzx eax, byte ptr [rcx].TMediumLocklessBin.Count
dec byte ptr [rcx].TMediumLocklessBin.Count
mov r11, [rcx + TMediumLocklessBin.Instance - 8 + rax * 8]
mov byte ptr [rcx].TMediumLocklessBin.Locked, false
mov rbx, qword ptr [r11 - BlockHeaderSize]
and rbx, DropMediumAndLargeFlagsMask
jmp @MediumBlocksLocked
@Done: {$endif FPCMM_LOCKLESSFREEMEDIUM}
// Unlock medium blocks and leave
mov byte ptr [r10 + TMediumBlockInfo.Locked], false
jmp @Quit
@NextBlockIsFree:
// Get rax = next block address, rbx = end of the block
lea rax, [r11 + rbx]
and rcx, DropMediumAndLargeFlagsMask
add rbx, rcx
// Was the block binned?
cmp rcx, MinimumMediumBlockSize
jb @NextBlockChecked
mov rcx, rax
call RemoveMediumFreeBlock // rcx = APMediumFreeBlock
jmp @NextBlockChecked
@PreviousBlockIsFree:
// Get rcx = size/point of the previous free block, rbx = new block end
mov rcx, [r11 - 16]
sub r11, rcx
add rbx, rcx
// Remove the previous block from the linked list
cmp ecx, MinimumMediumBlockSize
jb @PreviousBlockChecked
mov rcx, r11
call RemoveMediumFreeBlock // rcx = APMediumFreeBlock
jmp @PreviousBlockChecked
@EntireMediumPoolFree:
// Ensure current sequential feed pool is free
cmp dword ptr [r10 + TMediumBlockInfo.SequentialFeedBytesLeft], MediumBlockPoolSize - MediumBlockPoolHeaderSize
jne @MakeEmptyMediumPoolSequentialFeed
// Remove this medium block pool from the linked list stored in its header
sub r11, MediumBlockPoolHeaderSize
mov rax, TMediumBlockPoolHeader[r11].PreviousMediumBlockPoolHeader
mov rdx, TMediumBlockPoolHeader[r11].NextMediumBlockPoolHeader
mov TMediumBlockPoolHeader[rax].NextMediumBlockPoolHeader, rdx
mov TMediumBlockPoolHeader[rdx].PreviousMediumBlockPoolHeader, rax
// Unlock medium blocks and free the block pool
mov byte ptr [r10 + TMediumBlockInfo.Locked], false
mov arg1, r11
call FreeMedium
jmp @Quit
@MakeEmptyMediumPoolSequentialFeed:
// Get rbx = end-marker block, and recycle the current sequential feed pool
lea rbx, [r11 + MediumBlockPoolSize - MediumBlockPoolHeaderSize]
mov arg1, r10
call BinMediumSequentialFeedRemainder
// Set this medium pool up as the new sequential feed pool, unlock and leave
mov qword ptr [rbx - BlockHeaderSize], IsMediumBlockFlag
mov dword ptr [r10 + TMediumBlockInfo.SequentialFeedBytesLeft], MediumBlockPoolSize - MediumBlockPoolHeaderSize
mov [r10 + TMediumBlockInfo.LastSequentiallyFed], rbx
mov byte ptr [r10 + TMediumBlockInfo.Locked], false
@Quit: // restore registers and the stack frame
pop rax // medium block size
pop rbx
end;
{$ifdef FPCMM_REPORTMEMORYLEAKS}
const
/// mark freed blocks with 00000000 BLODLESS marker to track incorrect usage
REPORTMEMORYLEAK_FREEDHEXSPEAK = $B10D1E55;
{$endif FPCMM_REPORTMEMORYLEAKS}
function _FreeMem(P: pointer): PtrUInt;
{$ifdef NOSFRAME} nostackframe; {$endif} assembler;
asm
{$ifdef FPCMM_REPORTMEMORYLEAKS}
mov eax, REPORTMEMORYLEAK_FREEDHEXSPEAK // 00000000 BLODLESS marker
{$endif FPCMM_REPORTMEMORYLEAKS}
{$ifndef MSWINDOWS}
mov rcx, P
{$endif MSWINDOWS}
test P, P
jz @Quit // void pointer
{$ifdef FPCMM_REPORTMEMORYLEAKS}
mov qword ptr [P], rax // over TObject VMT or string/dynarray header
{$endif FPCMM_REPORTMEMORYLEAKS}
mov rdx, qword ptr [P - BlockHeaderSize]
{$ifdef FPCMM_ASSUMEMULTITHREAD}
mov eax, $100
{$else}
mov rax, qword ptr [rip + SmallBlockInfo].TSmallBlockInfo.IsMultiThreadPtr
{$endif FPCMM_ASSUMEMULTITHREAD}
// Is it a small block in use?
test dl, IsFreeBlockFlag + IsMediumBlockFlag + IsLargeBlockFlag
jnz @NotSmallBlockInUse
// Get the small block type in rbx and try to grab it
push rbx
mov rbx, [rdx].TSmallBlockPoolHeader.BlockType
{$ifdef FPCMM_ASSUMEMULTITHREAD}
lock cmpxchg byte ptr [rbx].TSmallBlockType.Locked, ah
jne @CheckTinySmallLock
{$else}
cmp byte ptr [rax], false
jne @TinySmallLockLoop // lock if IsMultiThread=true
{$endif FPCMM_ASSUMEMULTITHREAD}
@FreeAndUnlock:
// rbx=TSmallBlockType rcx=P rdx=TSmallBlockPoolHeader
// Adjust number of blocks in use, set rax = old first free block
add [rbx].TSmallBlockType.FreememCount, 1
mov rax, [rdx].TSmallBlockPoolHeader.FirstFreeBlock
sub [rdx].TSmallBlockPoolHeader.BlocksInUse, 1
jz @PoolIsNowEmpty
// Store this as the new first free block
mov [rdx].TSmallBlockPoolHeader.FirstFreeBlock, rcx
// Store the previous first free block as the block header
lea r9, [rax + IsFreeBlockFlag]
mov [rcx - BlockHeaderSize], r9
// Was the pool full?
test rax, rax
jnz @SmallPoolWasNotFull
// Insert the pool back into the linked list if it was full
mov rcx, [rbx].TSmallBlockType.NextPartiallyFreePool
mov [rdx].TSmallBlockPoolHeader.PreviousPartiallyFreePool, rbx
mov [rdx].TSmallBlockPoolHeader.NextPartiallyFreePool, rcx
mov [rcx].TSmallBlockPoolHeader.PreviousPartiallyFreePool, rdx
mov [rbx].TSmallBlockType.NextPartiallyFreePool, rdx
@SmallPoolWasNotFull:
{$ifdef FPCMM_LOCKLESSFREE}
// Try to release all pending bin from this block while we have the lock
cmp byte ptr [rbx].TSmallBlockType.BinCount, 0
jne @ProcessPendingBin
{$endif FPCMM_LOCKLESSFREE}
mov byte ptr [rbx].TSmallBlockType.Locked, false
movzx eax, word ptr [rbx].TSmallBlockType.BlockSize
{$ifdef NOSFRAME}
pop rbx
ret
{$else}
jmp @Done // on Win64, a stack frame is required
{$endif NOSFRAME}
@PoolIsNowEmpty:
// FirstFreeBlock=nil means it is the sequential feed pool with a single block
test rax, rax
jz @IsSequentialFeedPool
// Pool is now empty: Remove it from the linked list and free it
mov rax, [rdx].TSmallBlockPoolHeader.PreviousPartiallyFreePool
mov rcx, [rdx].TSmallBlockPoolHeader.NextPartiallyFreePool
mov TSmallBlockPoolHeader[rax].NextPartiallyFreePool, rcx
mov [rcx].TSmallBlockPoolHeader.PreviousPartiallyFreePool, rax
// Is this the sequential feed pool? If so, stop sequential feeding
xor eax, eax
cmp [rbx].TSmallBlockType.CurrentSequentialFeedPool, rdx
jne @NotSequentialFeedPool
@IsSequentialFeedPool:
mov [rbx].TSmallBlockType.MaxSequentialFeedBlockAddress, rax
@NotSequentialFeedPool:
// Unlock blocktype and release this pool
mov byte ptr [rbx].TSmallBlockType.Locked, false
mov rcx, rdx
mov rdx, qword ptr [rdx - BlockHeaderSize]
lea r10, [rip + SmallMediumBlockInfo]
call FreeMediumBlock // no call nor BinLocked to avoid race condition
movzx eax, word ptr [rbx].TSmallBlockType.BlockSize
{$ifdef NOSFRAME}
pop rbx
ret
{$else}
jmp @Done // on Win64, a stack frame is required
{$endif NOSFRAME}
{$ifdef FPCMM_LOCKLESSFREE}
@ProcessPendingBin:
// Try once to acquire BinLocked (spinning may induce race condition)
{$ifdef FPCMM_CMPBEFORELOCK}
cmp byte ptr [rbx].TSmallBlockType.BinLocked, true
je @BinAlreadyLocked
{$endif FPCMM_CMPBEFORELOCK}
mov eax, $100
lock cmpxchg byte ptr [rbx].TSmallBlockType.BinLocked, ah
jne @BinAlreadyLocked
movzx eax, byte ptr [rbx].TSmallBlockType.BinCount
test al, al
jz @NoBin
// free last pointer in TSmallBlockType.BinInstance[]
mov rcx, qword ptr [rbx + TSmallBlockType.BinInstance - 8 + rax * 8]
dec byte ptr [rbx].TSmallBlockType.BinCount
mov byte ptr [rbx].TSmallBlockType.BinLocked, false
mov rdx, [rcx - BlockHeaderSize]
jmp @FreeAndUnlock // loop until BinCount=0 or BinLocked
@NoBin: mov byte ptr [rbx].TSmallBlockType.BinLocked, false
@BinAlreadyLocked:
mov byte ptr [rbx].TSmallBlockType.Locked, false
{$endif FPCMM_LOCKLESSFREE}
movzx eax, word ptr [rbx].TSmallBlockType.BlockSize
{$ifdef NOSFRAME}
pop rbx
ret
{$else}
jmp @Done // on Win64, a stack frame is required
{$endif NOSFRAME}
@NotSmallBlockInUse:
lea r10, [rip + MediumBlockInfo]
test dl, IsFreeBlockFlag + IsLargeBlockFlag
jz @DoFreeMedium
call FreeLargeBlock // P is still in rcx/rdi first param register
jmp @Quit
@DoFreeMedium:
call FreeMediumBlock
jmp @Quit
@TinySmallLockLoop:
mov eax, $100
lock cmpxchg byte ptr [rbx].TSmallBlockType.Locked, ah
je @FreeAndUnlock
@CheckTinySmallLock:
{$ifdef FPCMM_LOCKLESSFREE}
// Try to put rcx=P in TSmallBlockType.BinInstance[]
cmp byte ptr [rbx].TSmallBlockType.BinCount, SmallBlockBinCount
je @LockBlockTypeSleep // wait if all slots are filled
mov eax, $100
lock cmpxchg byte ptr [rbx].TSmallBlockType.BinLocked, ah
je @BinLocked
{$ifdef FPCMM_PAUSE}
{$ifdef FPCMM_SLEEPTSC}
push rdx
rdtsc
shl rdx, 32
lea r9, [rax + rdx + SpinSmallFreememBinTSC] // r9 = endtsc
{$else}
mov r9d, SpinFreememBinCount
{$endif FPCMM_SLEEPTSC}
@SpinBinLock:
pause
{$ifdef FPCMM_SLEEPTSC}
rdtsc
shl rdx, 32
or rax, rdx
cmp rax, r9
ja @SpinTimeout
{$else}
dec r9
jz @SpinTimeout
{$endif FPCMM_SLEEPTSC}
{$ifdef FPCMM_CMPBEFORELOCK_SPIN}
cmp byte ptr [rbx].TSmallBlockType.BinLocked, true
je @SpinBinLock
{$endif FPCMM_CMPBEFORELOCK_SPIN}
mov eax, $100
lock cmpxchg byte ptr [rbx].TSmallBlockType.BinLocked, ah
jne @SpinBinLock
{$ifdef FPCMM_SLEEPTSC}
pop rdx
{$endif FPCMM_SLEEPTSC}
jmp @BinLocked
@SpinTimeout:
{$ifdef FPCMM_SLEEPTSC}
pop rdx
{$endif FPCMM_SLEEPTSC}
{$endif FPCMM_PAUSE}
{$ifdef FPCMM_DEBUG} // no lock (informative only)
inc dword ptr [rbx].TSmallBlockType.BinSpinCount
{$endif FPCMM_DEBUG}
jmp @LockBlockTypeSleep
@BinLocked:
movzx eax, byte ptr [rbx].TSmallBlockType.BinCount
cmp al, SmallBlockBinCount
je @LockBlockType
add byte ptr [rbx].TSmallBlockType.BinCount, 1
mov [rbx + TSmallBlockType.BinInstance + rax * 8], rcx
mov byte ptr [rbx].TSmallBlockType.BinLocked, false
movzx eax, word ptr [rbx].TSmallBlockType.BlockSize
{$ifdef NOSFRAME}
pop rbx
ret
{$else}
jmp @Done // on Win64, a stack frame is required
{$endif NOSFRAME}
@LockBlockType:
// Fallback to main block lock if TSmallBlockType.BinInstance[] is full
mov byte ptr [rbx].TSmallBlockType.BinLocked, false
@LockBlockTypeSleep:
{$endif FPCMM_LOCKLESSFREE}
{$ifdef FPCMM_PAUSE}
// Spin to grab the block type (don't try too long due to contention)
{$ifdef FPCMM_SLEEPTSC}
push rdx
rdtsc
shl rdx, 32
lea r9, [rax + rdx + SpinSmallFreememLockTSC] // r9 = endtsc
@SpinLockBlockType:
pause
rdtsc
shl rdx, 32
or rax, rdx
cmp rax, r9
ja @LockBlockTypeReleaseCore
{$else}
mov r8d, SpinSmallFreememLockCount
@SpinLockBlockType:
pause
dec r8d
jz @LockBlockTypeReleaseCore
{$endif FPCMM_SLEEPTSC}
mov eax, $100
{$ifdef FPCMM_CMPBEFORELOCK_SPIN}
cmp byte ptr [rbx].TSmallBlockType.Locked, true
je @SpinLockBlockType
{$endif FPCMM_CMPBEFORELOCK_SPIN}
lock cmpxchg byte ptr [rbx].TSmallBlockType.Locked, ah
jne @SpinLockBlockType
{$ifdef FPCMM_SLEEPTSC}
pop rdx
{$endif FPCMM_SLEEPTSC}
jmp @FreeAndUnlock
@LockBlockTypeReleaseCore:
{$ifdef FPCMM_SLEEPTSC}
pop rdx
{$endif FPCMM_SLEEPTSC}
{$endif FPCMM_PAUSE}
// Couldn't grab the block type - sleep and try again
{$ifdef FPCMM_DEBUG} lock {$endif}
inc dword ptr [rbx].TSmallBlockType.FreeMemSleepCount
push rdx
push rcx
call ReleaseCore
pop rcx
pop rdx
jmp @TinySmallLockLoop
@Done: // restore rbx and the stack frame before ret
pop rbx
@Quit:
end;
// warning: FPC signature is not the same than Delphi: requires "var P"
function _ReallocMem(var P: pointer; Size: PtrUInt): pointer;
{$ifdef NOSFRAME} nostackframe; {$endif} assembler;
asm
{$ifdef MSWINDOWS}
push rdi
push rsi
{$else}
mov rdx, Size
{$endif MSWINDOWS}
push rbx
push r14
push P // for assignement in @Done
mov r14, qword ptr [P]
test rdx, rdx
jz @VoidSize // ReallocMem(P,0)=FreeMem(P)
test r14, r14
jz @GetMemMoveFreeMem // ReallocMem(nil,Size)=GetMem(Size)
mov rcx, [r14 - BlockHeaderSize]
test cl, IsFreeBlockFlag + IsMediumBlockFlag + IsLargeBlockFlag
jnz @NotASmallBlock
// -------------- TINY/SMALL block -------------
// Get rbx=blocktype, rcx=available size, rax=inplaceresize
mov rbx, [rcx].TSmallBlockPoolHeader.BlockType
lea rax, [rdx * 4 + SmallBlockDownsizeCheckAdder]
movzx ecx, [rbx].TSmallBlockType.BlockSize
sub ecx, BlockHeaderSize
cmp rcx, rdx
jb @SmallUpsize
// Downsize or small growup with enough space: reallocate only if need
cmp eax, ecx
jb @GetMemMoveFreeMem // r14=P rdx=size
@NoResize:
// branchless execution if current block is good enough for this size
mov rax, r14 // keep original pointer
pop rcx
{$ifdef NOSFRAME}
pop r14
pop rbx
ret
{$else}
jmp @Quit // on Win64, a stack frame is required
{$endif NOSFRAME}
@VoidSize:
push rdx // to set P=nil
jmp @DoFree // ReallocMem(P,0)=FreeMem(P)
@SmallUpsize:
// State: r14=pointer, rdx=NewSize, rcx=CurrentBlockSize, rbx=CurrentBlockType
// Small blocks always grow with at least 100% + SmallBlockUpsizeAdder bytes
lea P, qword ptr [rcx * 2 + SmallBlockUpsizeAdder]
movzx ebx, [rbx].TSmallBlockType.BlockSize
sub ebx, BlockHeaderSize + 8
// r14=pointer, P=NextUpBlockSize, rdx=NewSize, rbx=OldSize-8
@AdjustGetMemMoveFreeMem:
// New allocated size is max(requestedsize, minimumupsize)
cmp rdx, P
cmova P, rdx
push rdx
call _GetMem
pop rdx
test rax, rax
jz @Done
jmp @MoveFreeMem // rax=New r14=P rbx=size-8
@GetMemMoveFreeMem:
// reallocate copy and free: r14=P rdx=size
mov rbx, rdx
mov P, rdx // P is the proper first argument register
call _GetMem
test rax, rax
jz @Done
test r14, r14 // ReallocMem(nil,Size)=GetMem(Size)
jz @Done
sub rbx, 8
@MoveFreeMem:
// copy and free: rax=New r14=P rbx=size-8
push rax
{$ifdef FPCMM_ERMS}
cmp rbx, ErmsMinSize // startup cost of 0..255 bytes
jae @erms
{$endif FPCMM_ERMS}
lea rcx, [r14 + rbx]
lea rdx, [rax + rbx]
neg rbx
jns @Last8
align 16
@By16: movaps xmm0, oword ptr [rcx + rbx]
movaps oword ptr [rdx + rbx], xmm0
add rbx, 16
js @By16
@Last8: mov rax, qword ptr [rcx + rbx]
mov qword ptr [rdx + rbx], rax
@DoFree:mov P, r14
call _FreeMem
pop rax
jmp @Done
{$ifdef FPCMM_ERMS}
@erms: cld
mov rsi, r14
mov rdi, rax
lea rcx, [rbx + 8]
rep movsb
jmp @DoFree
{$endif FPCMM_ERMS}
@NotASmallBlock:
// Is this a medium block or a large block?
test cl, IsFreeBlockFlag + IsLargeBlockFlag
jnz @PossibleLargeBlock
// -------------- MEDIUM block -------------
// rcx=CurrentSize+Flags, r14=P, rdx=RequestedSize, r10=TMediumBlockInfo
lea rsi, [rdx + rdx]
lea r10, [rip + MediumBlockInfo]
mov rbx, rcx
and ecx, DropMediumAndLargeFlagsMask
lea rdi, [r14 + rcx]
sub ecx, BlockHeaderSize
and ebx, ExtractMediumAndLargeFlagsMask
// Is it an upsize or a downsize?
cmp rdx, rcx
ja @MediumBlockUpsize
// rcx=CurrentBlockSize-BlockHeaderSize, rbx=CurrentBlockFlags,
// rdi=@NextBlock, r14=P, rdx=RequestedSize
// Downsize reallocate and move data only if less than half the current size
cmp rsi, rcx
jae @NoResize
// In-place downsize? Ensure not smaller than MinimumMediumBlockSize
cmp edx, MinimumMediumBlockSize - BlockHeaderSize
jae @MediumBlockInPlaceDownsize
// Need to move to another Medium block pool, or into a Small block?
cmp edx, MediumInPlaceDownsizeLimit
jb @GetMemMoveFreeMem
// No need to realloc: resize in-place (if not already at the minimum size)
mov edx, MinimumMediumBlockSize - BlockHeaderSize
cmp ecx, MinimumMediumBlockSize - BlockHeaderSize
jna @NoResize
@MediumBlockInPlaceDownsize:
// Round up to the next medium block size
lea rsi, [rdx + BlockHeaderSize + MediumBlockGranularity - 1 - MediumBlockSizeOffset]
and rsi, - MediumBlockGranularity
add rsi, MediumBlockSizeOffset
// Get the size of the second split
add ecx, BlockHeaderSize
sub ecx, esi
mov ebx, ecx
// Lock the medium blocks
mov rcx, r10
{$ifndef FPCMM_ASSUMEMULTITHREAD}
mov rax, [r10 + TMediumBlockinfo.IsMultiThreadPtr]
cmp byte ptr [rax], false
je @MediumBlocksLocked1 // no lock if IsMultiThread=false
{$endif FPCMM_ASSUMEMULTITHREAD}
mov eax, $100
lock cmpxchg byte ptr [rcx].TMediumBlockInfo.Locked, ah
je @MediumBlocksLocked1
call LockMediumBlocks
@MediumBlocksLocked1:
mov ecx, ebx
// Reread the flags - may have changed before medium blocks could be locked
mov rbx, ExtractMediumAndLargeFlagsMask
and rbx, [r14 - BlockHeaderSize]
@DoMediumInPlaceDownsize:
// Set the new size in header, and get rbx = second split size
or rbx, rsi
mov [r14 - BlockHeaderSize], rbx
mov ebx, ecx
// If the next block is used, flag its previous block as free
mov rdx, [rdi - BlockHeaderSize]
test dl, IsFreeBlockFlag
jnz @MediumDownsizeNextBlockFree
or rdx, PreviousMediumBlockIsFreeFlag
mov [rdi - BlockHeaderSize], rdx
jmp @MediumDownsizeDoSplit
@MediumDownsizeNextBlockFree:
// If the next block is free, combine both
mov rcx, rdi
and rdx, DropMediumAndLargeFlagsMask
add rbx, rdx
add rdi, rdx
cmp edx, MinimumMediumBlockSize
jb @MediumDownsizeDoSplit
call RemoveMediumFreeBlock // rcx=APMediumFreeBlock
@MediumDownsizeDoSplit:
// Store the trailing size field and free part header
mov [rdi - 16], rbx
lea rcx, [rbx + IsMediumBlockFlag + IsFreeBlockFlag];
mov [r14 + rsi - BlockHeaderSize], rcx
// Bin this free block (if worth it)
cmp rbx, MinimumMediumBlockSize
jb @MediumBlockDownsizeDone
lea rcx, [r14 + rsi]
mov rdx, rbx
call InsertMediumBlockIntoBin // rcx=APMediumFreeBlock, edx=AMediumBlockSize
@MediumBlockDownsizeDone:
// Unlock the medium blocks, and leave with the new pointer
mov byte ptr [r10 + TMediumBlockInfo.Locked], false
mov rax, r14
jmp @Done
@MediumBlockUpsize:
// ecx = Current Block Size - BlockHeaderSize, bl = Current Block Flags,
// rdi = @Next Block, r14 = P, rdx = Requested Size
// Try to make in-place upsize
mov rax, [rdi - BlockHeaderSize]
test al, IsFreeBlockFlag
jz @CannotUpsizeMediumBlockInPlace
// Get rax = available size, rsi = available size with the next block
and rax, DropMediumAndLargeFlagsMask
lea rsi, [rax + rcx]
cmp rdx, rsi
ja @CannotUpsizeMediumBlockInPlace
// Grow into the next block
mov rbx, rcx
mov rcx, r10
{$ifndef FPCMM_ASSUMEMULTITHREAD}
mov rax, [r10 + TMediumBlockinfo.IsMultiThreadPtr]
cmp byte ptr [rax], false
je @MediumBlocksLocked2 // no lock if IsMultiThread=false
{$endif FPCMM_ASSUMEMULTITHREAD}
mov eax, $100
lock cmpxchg byte ptr [rcx].TMediumBlockInfo.Locked, ah
je @MediumBlocksLocked2
mov rsi, rdx
call LockMediumBlocks
mov rdx, rsi
@MediumBlocksLocked2:
// Re-read info once locked, and ensure next block is still free
mov rcx, rbx
mov rbx, ExtractMediumAndLargeFlagsMask
and rbx, [r14 - BlockHeaderSize]
mov rax, [rdi - BlockHeaderSize]
test al, IsFreeBlockFlag
jz @NextMediumBlockChanged
and eax, DropMediumAndLargeFlagsMask
lea rsi, [rax + rcx]
cmp rdx, rsi
ja @NextMediumBlockChanged
@DoMediumInPlaceUpsize:
// Bin next free block (if worth it)
cmp eax, MinimumMediumBlockSize
jb @MediumInPlaceNoNextRemove
push rcx
push rdx
mov rcx, rdi
call RemoveMediumFreeBlock // rcx=APMediumFreeBlock
pop rdx
pop rcx
@MediumInPlaceNoNextRemove:
// Medium blocks grow a minimum of 25% in in-place upsizes
mov eax, ecx
shr eax, 2
add eax, ecx
// Get the maximum of the requested size and the minimum growth size
xor edi, edi
sub eax, edx
adc edi, -1
and eax, edi
// Round up to the nearest block size granularity
lea rax, [rax + rdx + BlockHeaderSize + MediumBlockGranularity - 1 - MediumBlockSizeOffset]
and eax, -MediumBlockGranularity
add eax, MediumBlockSizeOffset
// Calculate the size of the second split and check if it fits
lea rdx, [rsi + BlockHeaderSize]
sub edx, eax
ja @MediumInPlaceUpsizeSplit
// Grab the whole block: Mark it as used in the next block, and adjust size
and qword ptr [r14 + rsi], NOT PreviousMediumBlockIsFreeFlag
add rsi, BlockHeaderSize
jmp @MediumUpsizeInPlaceDone
@MediumInPlaceUpsizeSplit:
// Store the size of the second split as the second last pointer
mov [r14 + rsi - BlockHeaderSize], rdx
// Set the second split header
lea rdi, [rdx + IsMediumBlockFlag + IsFreeBlockFlag]
mov [r14 + rax - BlockHeaderSize], rdi
mov rsi, rax
cmp edx, MinimumMediumBlockSize
jb @MediumUpsizeInPlaceDone
lea rcx, [r14 + rax]
call InsertMediumBlockIntoBin // rcx=APMediumFreeBlock, edx=AMediumBlockSize
@MediumUpsizeInPlaceDone:
// No need to move data at upsize: set the size and flags for this block
or rsi, rbx
mov [r14 - BlockHeaderSize], rsi
mov byte ptr [r10 + TMediumBlockInfo.Locked], false
mov rax, r14
jmp @Done
@NextMediumBlockChanged:
// The next block changed during lock: reallocate and move data
mov byte ptr [r10 + TMediumBlockInfo.Locked], false
@CannotUpsizeMediumBlockInPlace:
// rcx=OldSize-8, rdx=NewSize
mov rbx, rcx
mov eax, ecx
shr eax, 2
lea P, qword ptr [rcx + rax] // NextUpBlockSize = OldSize+25%
jmp @AdjustGetMemMoveFreeMem // P=BlockSize, rdx=NewSize, rbx=OldSize-8
@PossibleLargeBlock:
// -------------- LARGE block -------------
test cl, IsFreeBlockFlag + IsMediumBlockFlag
jnz @Error
{$ifdef MSWINDOWS}
mov rcx, r14
{$else}
mov rdi, r14
mov rsi, rdx
{$endif MSWINDOWS}
call ReallocateLargeBlock // with restored proper registers
jmp @Done
@Error: xor eax, eax
@Done: // restore registers and the stack frame before ret
pop rcx
mov qword ptr [rcx], rax // store new pointer in var P
@Quit: pop r14
pop rbx
{$ifdef MSWINDOWS}
pop rsi
pop rdi
{$endif MSWINDOWS}
end;
function _AllocMem(Size: PtrUInt): pointer;
{$ifdef NOSFRAME} nostackframe; {$endif} assembler;
asm
push rbx
// Compute rbx = size rounded down to the last pointer
lea rbx, [Size - 1]
and rbx, - 8
// Perform the memory allocation
call _GetMem
// Could a block be allocated? rcx = 0 if yes, -1 if no
cmp rax, 1
sbb rcx, rcx
// Point rdx to the last pointer
lea rdx, [rax + rbx]
// Compute Size (1..8 doesn't need to enter the SSE2 loop)
or rbx, rcx
jz @LastQ
// Large blocks from mmap/VirtualAlloc are already zero filled
cmp rbx, MaximumMediumBlockSize - BlockHeaderSize
jae @Done
{$ifdef FPCMM_ERMS}
cmp rbx, ErmsMinSize // startup cost of 0..255 bytes
jae @erms
{$endif FPCMM_ERMS}
neg rbx
pxor xmm0, xmm0
align 16
@FillLoop: // non-temporal movntdq not needed with small/medium size
movaps oword ptr [rdx + rbx], xmm0
add rbx, 16
js @FillLoop
// fill the last pointer
@LastQ: xor rcx, rcx
mov qword ptr [rdx], rcx
{$ifdef FPCMM_ERMS}
{$ifdef NOSFRAME}
pop rbx
ret
{$else}
jmp @Done // on Win64, a stack frame is required
{$endif NOSFRAME}
// ERMS has a startup cost, but "rep stosd" is fast enough on all CPUs
@erms: mov rcx, rbx
push rax
{$ifdef MSWINDOWS}
push rdi
{$endif MSWINDOWS}
cld
mov rdi, rdx
xor eax, eax
sub rdi, rbx
shr ecx, 2
mov qword ptr [rdx], rax
rep stosd
{$ifdef MSWINDOWS}
pop rdi
{$endif MSWINDOWS}
pop rax
{$endif FPCMM_ERMS}
@Done: // restore rbx register and the stack frame before ret
pop rbx
end;
function _MemSize(P: pointer): PtrUInt;
begin
// AFAIK used only by fpc_AnsiStr_SetLength() in FPC RTL
// also used by our static SQLite3 for its xSize() callback
P := PPointer(PByte(P) - BlockHeaderSize)^;
if (PtrUInt(P) and (IsMediumBlockFlag or IsLargeBlockFlag)) = 0 then
result := PSmallBlockPoolHeader(PtrUInt(P) and DropSmallFlagsMask).
BlockType.BlockSize - BlockHeaderSize
else
begin
result := (PtrUInt(P) and DropMediumAndLargeFlagsMask) - BlockHeaderSize;
if (PtrUInt(P) and IsMediumBlockFlag) = 0 then
dec(result, LargeBlockHeaderSize);
end;
end;
function _FreeMemSize(P: pointer; size: PtrUInt): PtrInt;
begin
// should return the chunk size - only used by heaptrc AFAIK
if (P <> nil) and
(size <> 0) then
result := _FreeMem(P)
else
result := 0;
end;
{ ********* Information Gathering }
{$ifdef FPCMM_STANDALONE}
procedure Assert(flag: boolean);
begin
end;
{$else}
function _GetFPCHeapStatus: TFPCHeapStatus;
var
mm: PMMStatus;
begin
mm := @HeapStatus;
{$ifdef FPCMM_DEBUG}
result.MaxHeapSize := mm^.Medium.PeakBytes + mm^.Large.PeakBytes;
{$else}
result.MaxHeapSize := 0;
{$endif FPCMM_DEBUG}
result.MaxHeapUsed := result.MaxHeapSize;
result.CurrHeapSize := mm^.Medium.CurrentBytes + mm^.Large.CurrentBytes;
result.CurrHeapUsed := result.CurrHeapSize;
result.CurrHeapFree := 0;
end;
function _GetHeapStatus: THeapStatus;
begin
FillChar(result, sizeof(result), 0);
with HeapStatus do
result.TotalAllocated := Medium.CurrentBytes + Large.CurrentBytes;
result.TotalAddrSpace := result.TotalAllocated;
end;
type
// match both TSmallBlockStatus and TSmallBlockContention
TRes = array[0..2] of PtrUInt;
// details are allocated on the stack, not the heap
TResArray = array[0..(NumSmallInfoBlock * 2) - 1] of TRes;
procedure QuickSortRes(var Res: TResArray; L, R, Level: PtrInt);
var
I, J, P: PtrInt;
pivot: PtrUInt;
tmp: TRes;
begin
if L < R then
repeat
I := L;
J := R;
P := (L + R) shr 1;
repeat
pivot := Res[P, Level];
while Res[I, Level] > pivot do
inc(I);
while Res[J, Level] < pivot do
dec(J);
if I <= J then
begin
tmp := Res[J];
Res[J] := Res[I];
Res[I] := tmp;
if P = I then
P := J
else if P = J then
P := I;
inc(I);
dec(J);
end;
until I > J;
if J - L < R - I then
begin
// use recursion only for smaller range
if L < J then
QuickSortRes(Res, L, J, Level);
L := I;
end
else
begin
if I < R then
QuickSortRes(Res, I, R, Level);
R := J;
end;
until L >= R;
end;
procedure SetSmallBlockStatus(var res: TResArray; out small, tiny: cardinal);
var
i, a: integer;
p: PSmallBlockType;
d: ^TSmallBlockStatus;
begin
small := 0;
tiny := 0;
d := @res;
p := @SmallBlockInfo;
for i := 1 to NumSmallBlockTypes do
begin
inc(small, ord(p^.GetmemCount <> 0));
d^.Total := p^.GetmemCount;
d^.Current := p^.GetmemCount - p^.FreememCount;
d^.BlockSize := p^.BlockSize;
inc(d);
inc(p);
end;
for a := 1 to NumTinyBlockArenas do
begin
d := @res; // aggregate counters
for i := 1 to NumTinyBlockTypes do
begin
inc(tiny, ord(p^.GetmemCount <> 0));
inc(d^.Total, p^.GetmemCount);
inc(d^.Current, p^.GetmemCount - p^.FreememCount);
inc(d);
inc(p);
end;
end;
assert(p = @SmallBlockInfo.GetmemLookup);
end;
function SortSmallBlockStatus(var res: TResArray; maxcount, orderby: PtrInt;
count, bytes: PPtrUInt): PtrInt;
var
i: PtrInt;
begin
QuickSortRes(res, 0, NumSmallBlockTypes - 1, orderby);
if count <> nil then
begin
count^ := 0;
for i := 0 to NumSmallBlockTypes - 1 do
inc(count^, res[i, orderby]);
end;
if bytes <> nil then
begin
bytes^ := 0;
for i := 0 to NumSmallBlockTypes - 1 do
inc(bytes^, res[i, orderby] * res[i, ord(obBlockSize)]);
end;
result := maxcount;
if result > NumSmallBlockTypes then
result := NumSmallBlockTypes;
while (result > 0) and
(res[result - 1, orderby] = 0) do
dec(result);
end;
function SetSmallBlockContention(var res: TResArray; maxcount: integer): integer;
var
i: integer;
p: PSmallBlockType;
d: ^TSmallBlockContention;
begin
result := 0;
d := @res;
p := @SmallBlockInfo;
for i := 1 to NumSmallInfoBlock do
begin
if p^.GetmemSleepCount <> 0 then
begin
d^.SleepCount := p^.GetmemSleepCount;
d^.GetmemBlockSize := p^.BlockSize;
d^.FreememBlockSize := 0;
inc(d);
inc(result);
end;
if p^.FreememSleepCount <> 0 then
begin
d^.SleepCount := p^.FreememSleepCount;
d^.GetmemBlockSize := 0;
d^.FreememBlockSize := p^.BlockSize;
inc(d);
inc(result);
end;
inc(p);
end;
if result = 0 then
exit;
QuickSortRes(res, 0, result - 1, 0);
if result > maxcount then
result := maxcount;
end;
const
K_: array[0..4] of string[1] = (
'P', 'T', 'G', 'M', 'K');
function K(i: PtrUInt): ShortString;
var
j, n: PtrUInt;
tmp: PShortString;
begin
tmp := nil;
n := 1 shl 50;
for j := 0 to high(K_) do
if i >= n then
begin
i := i div n;
tmp := @K_[j];
break;
end
else
n := n shr 10;
str(i, result);
if tmp <> nil then
result := result + tmp^;
end;
function S(i: PtrUInt): ShortString;
begin
str(i, result);
end;
type
// allow to write into a temp string or the console
TGetHeapStatusWrite =
procedure(const V: array of ShortString; CRLF: boolean = true);
procedure WriteHeapStatusDetail(const arena: TMMStatusArena;
const name: ShortString; Wr: TGetHeapStatusWrite);
begin
Wr([name, K(arena.CurrentBytes),
'B/', K(arena.CumulativeBytes), 'B '], {crlf=}false);
{$ifdef FPCMM_DEBUG}
Wr([' peak=', K(arena.PeakBytes),
'B current=', K(arena.CumulativeAlloc - arena.CumulativeFree),
' alloc=', K(arena.CumulativeAlloc),
' free=', K(arena.CumulativeFree)], false);
{$endif FPCMM_DEBUG}
Wr([' sleep=', K(arena.SleepCount)]);
end;
procedure ComputeHeapStatus(const context: ShortString; smallblockstatuscount,
smallblockcontentioncount: integer; compilationflags: boolean;
Wr: TGetHeapStatusWrite);
var
res: TResArray; // no heap allocation involved
i, n, smallcount: PtrInt;
t, b: PtrUInt;
small, tiny: cardinal;
begin
if context[0] <> #0 then
Wr([context]);
if compilationflags then
Wr([' Flags:' + FPCMM_FLAGS]);
with CurrentHeapStatus do
begin
Wr([' Small: blocks=', K(SmallBlocks),
' size=', K(SmallBlocksSize), 'B (part of Medium arena)']);
WriteHeapStatusDetail(Medium, ' Medium: ', Wr);
WriteHeapStatusDetail(Large, ' Large: ', Wr);
if SleepCount <> 0 then
Wr([' Total Sleep: count=', K(SleepCount)
{$ifdef FPCMM_SLEEPTSC} , ' rdtsc=', K(SleepCycles) {$endif}]);
smallcount := SmallGetmemSleepCount + SmallFreememSleepCount
{$ifdef FPCMM_LOCKLESSFREE} {$ifdef FPCMM_DEBUG}
+ SmallFreememLockLessSpin {$endif} {$endif};
if smallcount <> 0 then
Wr([' Small Sleep: getmem=', K(SmallGetmemSleepCount),
' freemem=', K(SmallFreememSleepCount)
{$ifdef FPCMM_LOCKLESSFREE} {$ifdef FPCMM_DEBUG} ,
' locklessspin=', K(SmallFreememLockLessSpin) {$endif} {$endif}]);
end;
if (smallblockcontentioncount > 0) and
(smallcount <> 0) then
begin
n := SetSmallBlockContention(res, smallblockcontentioncount);
for i := 0 to n - 1 do
with TSmallBlockContention(res[i]) do
begin
if GetmemBlockSize <> 0 then
Wr([' getmem(', S(GetmemBlockSize)], {crlf=}false)
else
Wr([' freemem(', S(FreememBlockSize)], false);
Wr([')=' , K(SleepCount)], false);
if (i and 3 = 3) or
(i = n - 1) then
Wr([]);
end;
end;
if smallblockstatuscount > 0 then
begin
SetSmallBlockStatus(res, small, tiny);
n := SortSmallBlockStatus(res, smallblockstatuscount, ord(obTotal), @t, @b) - 1;
Wr([' Small Blocks since beginning: ', K(t), '/', K(b),
'B (as small=', K(small), '/', S(NumSmallBlockTypes),
' tiny=', K(tiny), '/', S(NumTinyBlockArenas * NumTinyBlockTypes), ')']);
for i := 0 to n do
with TSmallBlockStatus(res[i]) do
begin
Wr([' ', S(BlockSize), '=', K(Total)], false);
if (i and 7 = 7) or
(i = n) then
Wr([]);
end;
n := SortSmallBlockStatus(res, smallblockstatuscount, ord(obCurrent), @t, @b) - 1;
Wr([' Small Blocks current: ', K(t), '/', K(b), 'B']);
for i := 0 to n do
with TSmallBlockStatus(res[i]) do
begin
Wr([' ', S(BlockSize), '=', K(Current)], false);
if (i and 7 = 7) or
(i = n) then
Wr([]);
end;
end;
end;
var
WrStrTemp: string; // we don't require thread safety here
WrStrOnSameLine: boolean;
procedure WrStr(const V: array of ShortString; CRLF: boolean);
var
i: PtrInt;
begin // we don't have format() nor formatutf8() -> this is good enough
for i := 0 to high(V) do
WrStrTemp := WrStrTemp + string(V[i]); // fast enough
if CRLF and
not WrStrOnSameLine then
WrStrTemp := WrStrTemp + #13#10;
end;
function GetHeapStatus(const context: ShortString; smallblockstatuscount,
smallblockcontentioncount: integer; compilationflags, onsameline: boolean): string;
begin
WrStrOnSameLine := onsameline;
ComputeHeapStatus(context, smallblockstatuscount, smallblockcontentioncount,
compilationflags, WrStr);
result := WrStrTemp;
WrStrTemp := '';
end;
procedure WrConsole(const V: array of ShortString; CRLF: boolean);
var
i: PtrInt;
begin // direct write to the console with no memory heap allocation
{$I-}
for i := 0 to high(V) do
write(V[i]);
if CRLF then
writeln;
ioresult;
{$I+}
end;
procedure WriteHeapStatus(const context: ShortString; smallblockstatuscount,
smallblockcontentioncount: integer; compilationflags: boolean);