@@ -23,6 +23,11 @@ typedef struct _gc_runtime_state GCState;
2323# define GC_DEBUG
2424#endif
2525
26+ // Each thread buffers the count of allocated objects in a thread-local
27+ // variable up to +/- this amount to reduce the overhead of updating
28+ // the global count.
29+ #define LOCAL_ALLOC_COUNT_THRESHOLD 512
30+
2631// Automatically choose the generation that needs collecting.
2732#define GENERATION_AUTO (-1)
2833
@@ -959,6 +964,41 @@ gc_should_collect(GCState *gcstate)
959964 gcstate -> generations [1 ].threshold == 0 );
960965}
961966
967+ static void
968+ record_allocation (PyThreadState * tstate )
969+ {
970+ struct _gc_thread_state * gc = & ((_PyThreadStateImpl * )tstate )-> gc ;
971+
972+ // We buffer the allocation count to avoid the overhead of atomic
973+ // operations for every allocation.
974+ gc -> alloc_count ++ ;
975+ if (gc -> alloc_count >= LOCAL_ALLOC_COUNT_THRESHOLD ) {
976+ // TODO: Use Py_ssize_t for the generation count.
977+ GCState * gcstate = & tstate -> interp -> gc ;
978+ _Py_atomic_add_int (& gcstate -> generations [0 ].count , (int )gc -> alloc_count );
979+ gc -> alloc_count = 0 ;
980+
981+ if (gc_should_collect (gcstate ) &&
982+ !_Py_atomic_load_int_relaxed (& gcstate -> collecting ))
983+ {
984+ _Py_ScheduleGC (tstate -> interp );
985+ }
986+ }
987+ }
988+
989+ static void
990+ record_deallocation (PyThreadState * tstate )
991+ {
992+ struct _gc_thread_state * gc = & ((_PyThreadStateImpl * )tstate )-> gc ;
993+
994+ gc -> alloc_count -- ;
995+ if (gc -> alloc_count <= - LOCAL_ALLOC_COUNT_THRESHOLD ) {
996+ GCState * gcstate = & tstate -> interp -> gc ;
997+ _Py_atomic_add_int (& gcstate -> generations [0 ].count , (int )gc -> alloc_count );
998+ gc -> alloc_count = 0 ;
999+ }
1000+ }
1001+
9621002static void
9631003gc_collect_internal (PyInterpreterState * interp , struct collection_state * state )
9641004{
@@ -981,6 +1021,9 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state)
9811021 }
9821022 }
9831023
1024+ // Record the number of live GC objects
1025+ interp -> gc .long_lived_total = state -> long_lived_total ;
1026+
9841027 // Clear weakrefs and enqueue callbacks (but do not call them).
9851028 clear_weakrefs (state );
9861029 _PyEval_StartTheWorld (interp );
@@ -1090,7 +1133,6 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
10901133
10911134 m = state .collected ;
10921135 n = state .uncollectable ;
1093- gcstate -> long_lived_total = state .long_lived_total ;
10941136
10951137 if (gcstate -> debug & _PyGC_DEBUG_STATS ) {
10961138 double d = _PyTime_AsSecondsDouble (_PyTime_GetPerfCounter () - t1 );
@@ -1530,15 +1572,7 @@ _Py_ScheduleGC(PyInterpreterState *interp)
15301572void
15311573_PyObject_GC_Link (PyObject * op )
15321574{
1533- PyThreadState * tstate = _PyThreadState_GET ();
1534- GCState * gcstate = & tstate -> interp -> gc ;
1535- gcstate -> generations [0 ].count ++ ;
1536-
1537- if (gc_should_collect (gcstate ) &&
1538- !_Py_atomic_load_int_relaxed (& gcstate -> collecting ))
1539- {
1540- _Py_ScheduleGC (tstate -> interp );
1541- }
1575+ record_allocation (_PyThreadState_GET ());
15421576}
15431577
15441578void
@@ -1564,7 +1598,7 @@ gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize)
15641598 ((PyObject * * )mem )[1 ] = NULL ;
15651599 }
15661600 PyObject * op = (PyObject * )(mem + presize );
1567- _PyObject_GC_Link ( op );
1601+ record_allocation ( tstate );
15681602 return op ;
15691603}
15701604
@@ -1646,10 +1680,9 @@ PyObject_GC_Del(void *op)
16461680 PyErr_SetRaisedException (exc );
16471681#endif
16481682 }
1649- GCState * gcstate = get_gc_state ();
1650- if (gcstate -> generations [0 ].count > 0 ) {
1651- gcstate -> generations [0 ].count -- ;
1652- }
1683+
1684+ record_deallocation (_PyThreadState_GET ());
1685+
16531686 PyObject_Free (((char * )op )- presize );
16541687}
16551688
0 commit comments