From 0e4a73074f62fd6c75b177301e01d4c7f4703a39 Mon Sep 17 00:00:00 2001 From: ruema Date: Thu, 25 Jul 2024 21:10:47 +0000 Subject: [PATCH 1/8] Speed up float pack and unpack --- Include/internal/pycore_floatobject.h | 5 - Include/internal/pycore_runtime.h | 1 - Include/internal/pycore_runtime_init.h | 4 - Objects/floatobject.c | 758 ++++++++++++------------- Tools/c-analyzer/TODO | 4 - 5 files changed, 374 insertions(+), 398 deletions(-) diff --git a/Include/internal/pycore_floatobject.h b/Include/internal/pycore_floatobject.h index be1c6cc97720d2..b9be2348345162 100644 --- a/Include/internal/pycore_floatobject.h +++ b/Include/internal/pycore_floatobject.h @@ -25,11 +25,6 @@ enum _py_float_format_type { _py_float_format_ieee_little_endian, }; -struct _Py_float_runtime_state { - enum _py_float_format_type float_format; - enum _py_float_format_type double_format; -}; - diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index d4ffd977940a02..db66a291f2be58 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -276,7 +276,6 @@ typedef struct pyruntimestate { } audit_hooks; struct _py_object_runtime_state object_state; - struct _Py_float_runtime_state float_state; struct _Py_unicode_runtime_state unicode_state; struct _types_runtime_state types; diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index da2b8d5570de62..a4dd7224d9588e 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -152,10 +152,6 @@ extern PyTypeObject _PyExc_MemoryError; .stoptheworld = { \ .is_global = 1, \ }, \ - .float_state = { \ - .float_format = _py_float_format_unknown, \ - .double_format = _py_float_format_unknown, \ - }, \ .types = { \ .next_version_tag = 1, \ }, \ diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 82f39de421f245..497122d5641e30 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -18,6 +18,7 @@ #include // DBL_MAX #include // strtol() +#include // uint64_t /*[clinic input] class float "PyObject *" "&PyFloat_Type" @@ -1683,8 +1684,81 @@ typedef enum _py_float_format_type float_format_type; #define ieee_big_endian_format _py_float_format_ieee_big_endian #define ieee_little_endian_format _py_float_format_ieee_little_endian -#define float_format (_PyRuntime.float_state.float_format) -#define double_format (_PyRuntime.float_state.double_format) +static inline uint32_t byte_swap_uint32(uint32_t x) { + return ( + ((x >> 24) & 0xff) + | ((x >> 8) & 0xff00) + | ((x & 0xff00) << 8) + | ((x & 0xff) << 24) + ); +} + +static inline uint64_t byte_swap_uint64(uint64_t x) { + return ( + ((x >> 56) & 0xff) + | ((x >> 40) & 0xff00) + | ((x >> 24) & 0xff0000) + | ((x >> 8) & 0xff000000) + | ((x & 0xff000000) << 8) + | ((x & 0xff0000) << 24) + | ((x & 0xff00) << 40) + | ((x & 0xff) << 56) + ); +} + +/* We attempt to determine if this machine is using IEEE + floating point formats by peering at the bits of some + carefully chosen values. If it looks like we are on an + IEEE platform, the float packing/unpacking routines can + just copy bits, if not they resort to arithmetic & shifts + and masks. The shifts & masks approach works on all finite + values, but what happens to infinities, NaNs and signed + zeroes on packing is an accident, and attempting to unpack + a NaN or an infinity will raise an exception. + + Note that if we're on some whacked-out platform which uses + IEEE formats but isn't strictly little-endian or big- + endian, we will fall back to the portable shifts & masks + method. + + These functions are reduced to a no-op via compiler + optimizations. */ + +static inline float_format_type get_float_format(void) { + if (sizeof(float) == 4) { + float y = 16711938.0; + uint32_t z; + memcpy(&z, &y, 4); + if (z == 0x4b7f0102) + { + if (memcmp(&y, "\x4b\x7f\x01\x02", 4) == 0) { + return ieee_big_endian_format; + } else + if (memcmp(&y, "\x02\x01\x7f\x4b", 4) == 0) { + return ieee_little_endian_format; + } + } + } + return unknown_format; +} + +static inline float_format_type get_double_format(void) { + if (sizeof(float) == 4) { + double y = 9006104071832581.0; + uint64_t z; + memcpy(&z, &y, 8); + if (z == 0x433fff0102030405) + { + if (memcmp(&y, "\x43\x3f\xff\x01\x02\x03\x04\x05", 8) == 0) { + return ieee_big_endian_format; + } else + if (memcmp(&y, "\x05\x04\x03\x02\x01\xff\x3f\x43", 8) == 0) { + return ieee_little_endian_format; + } + } + } + return unknown_format; +} /*[clinic input] @@ -1711,10 +1785,10 @@ float___getformat___impl(PyTypeObject *type, const char *typestr) float_format_type r; if (strcmp(typestr, "double") == 0) { - r = double_format; + r = get_double_format(); } else if (strcmp(typestr, "float") == 0) { - r = float_format; + r = get_float_format(); } else { PyErr_SetString(PyExc_ValueError, @@ -1887,57 +1961,6 @@ PyTypeObject PyFloat_Type = { .tp_vectorcall = (vectorcallfunc)float_vectorcall, }; -static void -_init_global_state(void) -{ - float_format_type detected_double_format, detected_float_format; - - /* We attempt to determine if this machine is using IEEE - floating-point formats by peering at the bits of some - carefully chosen values. If it looks like we are on an - IEEE platform, the float packing/unpacking routines can - just copy bits, if not they resort to arithmetic & shifts - and masks. The shifts & masks approach works on all finite - values, but what happens to infinities, NaNs and signed - zeroes on packing is an accident, and attempting to unpack - a NaN or an infinity will raise an exception. - - Note that if we're on some whacked-out platform which uses - IEEE formats but isn't strictly little-endian or big- - endian, we will fall back to the portable shifts & masks - method. */ - -#if SIZEOF_DOUBLE == 8 - { - double x = 9006104071832581.0; - if (memcmp(&x, "\x43\x3f\xff\x01\x02\x03\x04\x05", 8) == 0) - detected_double_format = ieee_big_endian_format; - else if (memcmp(&x, "\x05\x04\x03\x02\x01\xff\x3f\x43", 8) == 0) - detected_double_format = ieee_little_endian_format; - else - detected_double_format = unknown_format; - } -#else - detected_double_format = unknown_format; -#endif - -#if SIZEOF_FLOAT == 4 - { - float y = 16711938.0; - if (memcmp(&y, "\x4b\x7f\x01\x02", 4) == 0) - detected_float_format = ieee_big_endian_format; - else if (memcmp(&y, "\x02\x01\x7f\x4b", 4) == 0) - detected_float_format = ieee_little_endian_format; - else - detected_float_format = unknown_format; - } -#else - detected_float_format = unknown_format; -#endif - - double_format = detected_double_format; - float_format = detected_float_format; -} void _PyFloat_InitState(PyInterpreterState *interp) @@ -1945,7 +1968,6 @@ _PyFloat_InitState(PyInterpreterState *interp) if (!_Py_IsMainInterpreter(interp)) { return; } - _init_global_state(); } PyStatus @@ -2099,105 +2121,98 @@ PyFloat_Pack2(double x, char *data, int le) int PyFloat_Pack4(double x, char *data, int le) { - unsigned char *p = (unsigned char *)data; - if (float_format == unknown_format) { - unsigned char sign; - int e; - double f; - unsigned int fbits; - int incr = 1; - - if (le) { - p += 3; - incr = -1; - } - - if (x < 0) { - sign = 1; - x = -x; - } - else - sign = 0; - - f = frexp(x, &e); - - /* Normalize f to be in the range [1.0, 2.0) */ - if (0.5 <= f && f < 1.0) { - f *= 2.0; - e--; - } - else if (f == 0.0) - e = 0; - else { - PyErr_SetString(PyExc_SystemError, - "frexp() result out of range"); - return -1; - } - - if (e >= 128) + float_format_type format = get_float_format(); + if (format != unknown_format) { + float z = x; + if (isinf(z) && ! isinf(x)) goto Overflow; - else if (e < -126) { - /* Gradual underflow */ - f = ldexp(f, 126 + e); - e = 0; - } - else if (!(e == 0 && f == 0.0)) { - e += 127; - f -= 1.0; /* Get rid of leading 1 */ - } - - f *= 8388608.0; /* 2**23 */ - fbits = (unsigned int)(f + 0.5); /* Round */ - assert(fbits <= 8388608); - if (fbits >> 23) { - /* The carry propagated out of a string of 23 1 bits. */ - fbits = 0; - ++e; - if (e >= 255) - goto Overflow; + uint32_t *p = (uint32_t *)data; + uint32_t s; + memcpy(&s, &z, 4); + if ((format == ieee_big_endian_format && le) + || (format == ieee_little_endian_format && !le)) { + s = byte_swap_uint32(s); } + *p = s; + return 0; + } - /* First byte */ - *p = (sign << 7) | (e >> 1); - p += incr; - /* Second byte */ - *p = (char) (((e & 1) << 7) | (fbits >> 16)); - p += incr; + unsigned char *p = (unsigned char *)data; + unsigned char sign; + int e; + double f; + unsigned int fbits; + int incr = 1; - /* Third byte */ - *p = (fbits >> 8) & 0xFF; - p += incr; + if (le) { + p += 3; + incr = -1; + } - /* Fourth byte */ - *p = fbits & 0xFF; + if (x < 0) { + sign = 1; + x = -x; + } + else + sign = 0; - /* Done */ - return 0; + f = frexp(x, &e); + /* Normalize f to be in the range [1.0, 2.0) */ + if (0.5 <= f && f < 1.0) { + f *= 2.0; + e--; } + else if (f == 0.0) + e = 0; else { - float y = (float)x; - int i, incr = 1; + PyErr_SetString(PyExc_SystemError, + "frexp() result out of range"); + return -1; + } + + if (e >= 128) + goto Overflow; + else if (e < -126) { + /* Gradual underflow */ + f = ldexp(f, 126 + e); + e = 0; + } + else if (!(e == 0 && f == 0.0)) { + e += 127; + f -= 1.0; /* Get rid of leading 1 */ + } - if (isinf(y) && !isinf(x)) + f *= 8388608.0; /* 2**23 */ + fbits = (unsigned int)(f + 0.5); /* Round */ + assert(fbits <= 8388608); + if (fbits >> 23) { + /* The carry propagated out of a string of 23 1 bits. */ + fbits = 0; + ++e; + if (e >= 255) goto Overflow; + } - unsigned char s[sizeof(float)]; - memcpy(s, &y, sizeof(float)); + /* First byte */ + *p = (sign << 7) | (e >> 1); + p += incr; - if ((float_format == ieee_little_endian_format && !le) - || (float_format == ieee_big_endian_format && le)) { - p += 3; - incr = -1; - } + /* Second byte */ + *p = (char) (((e & 1) << 7) | (fbits >> 16)); + p += incr; + + /* Third byte */ + *p = (fbits >> 8) & 0xFF; + p += incr; + + /* Fourth byte */ + *p = fbits & 0xFF; + + /* Done */ + return 0; - for (i = 0; i < 4; i++) { - *p = s[i]; - p += incr; - } - return 0; - } Overflow: PyErr_SetString(PyExc_OverflowError, "float too large to pack with f format"); @@ -2207,131 +2222,126 @@ PyFloat_Pack4(double x, char *data, int le) int PyFloat_Pack8(double x, char *data, int le) { - unsigned char *p = (unsigned char *)data; - if (double_format == unknown_format) { - unsigned char sign; - int e; - double f; - unsigned int fhi, flo; - int incr = 1; - - if (le) { - p += 7; - incr = -1; - } - - if (x < 0) { - sign = 1; - x = -x; + float_format_type format = get_double_format(); + if (format != unknown_format) { + uint64_t *p = (uint64_t *)data; + uint64_t s; + memcpy(&s, &x, 8); + if ((format == ieee_big_endian_format && le) + || (format == ieee_little_endian_format && !le)) { + s = byte_swap_uint64(s); } - else - sign = 0; + *p = s; + return 0; + } - f = frexp(x, &e); + unsigned char *p = (unsigned char *)data; + unsigned char sign; + int e; + double f; + unsigned int fhi, flo; + int incr = 1; - /* Normalize f to be in the range [1.0, 2.0) */ - if (0.5 <= f && f < 1.0) { - f *= 2.0; - e--; - } - else if (f == 0.0) - e = 0; - else { - PyErr_SetString(PyExc_SystemError, - "frexp() result out of range"); - return -1; - } + if (le) { + p += 7; + incr = -1; + } - if (e >= 1024) - goto Overflow; - else if (e < -1022) { - /* Gradual underflow */ - f = ldexp(f, 1022 + e); - e = 0; - } - else if (!(e == 0 && f == 0.0)) { - e += 1023; - f -= 1.0; /* Get rid of leading 1 */ - } + if (x < 0) { + sign = 1; + x = -x; + } + else + sign = 0; - /* fhi receives the high 28 bits; flo the low 24 bits (== 52 bits) */ - f *= 268435456.0; /* 2**28 */ - fhi = (unsigned int)f; /* Truncate */ - assert(fhi < 268435456); - - f -= (double)fhi; - f *= 16777216.0; /* 2**24 */ - flo = (unsigned int)(f + 0.5); /* Round */ - assert(flo <= 16777216); - if (flo >> 24) { - /* The carry propagated out of a string of 24 1 bits. */ - flo = 0; - ++fhi; - if (fhi >> 28) { - /* And it also propagated out of the next 28 bits. */ - fhi = 0; - ++e; - if (e >= 2047) - goto Overflow; - } - } + f = frexp(x, &e); - /* First byte */ - *p = (sign << 7) | (e >> 4); - p += incr; + /* Normalize f to be in the range [1.0, 2.0) */ + if (0.5 <= f && f < 1.0) { + f *= 2.0; + e--; + } + else if (f == 0.0) + e = 0; + else { + PyErr_SetString(PyExc_SystemError, + "frexp() result out of range"); + return -1; + } - /* Second byte */ - *p = (unsigned char) (((e & 0xF) << 4) | (fhi >> 24)); - p += incr; + if (e >= 1024) + goto Overflow; + else if (e < -1022) { + /* Gradual underflow */ + f = ldexp(f, 1022 + e); + e = 0; + } + else if (!(e == 0 && f == 0.0)) { + e += 1023; + f -= 1.0; /* Get rid of leading 1 */ + } + + /* fhi receives the high 28 bits; flo the low 24 bits (== 52 bits) */ + f *= 268435456.0; /* 2**28 */ + fhi = (unsigned int)f; /* Truncate */ + assert(fhi < 268435456); + + f -= (double)fhi; + f *= 16777216.0; /* 2**24 */ + flo = (unsigned int)(f + 0.5); /* Round */ + assert(flo <= 16777216); + if (flo >> 24) { + /* The carry propagated out of a string of 24 1 bits. */ + flo = 0; + ++fhi; + if (fhi >> 28) { + /* And it also propagated out of the next 28 bits. */ + fhi = 0; + ++e; + if (e >= 2047) + goto Overflow; + } + } - /* Third byte */ - *p = (fhi >> 16) & 0xFF; - p += incr; + /* First byte */ + *p = (sign << 7) | (e >> 4); + p += incr; - /* Fourth byte */ - *p = (fhi >> 8) & 0xFF; - p += incr; + /* Second byte */ + *p = (unsigned char) (((e & 0xF) << 4) | (fhi >> 24)); + p += incr; - /* Fifth byte */ - *p = fhi & 0xFF; - p += incr; + /* Third byte */ + *p = (fhi >> 16) & 0xFF; + p += incr; - /* Sixth byte */ - *p = (flo >> 16) & 0xFF; - p += incr; + /* Fourth byte */ + *p = (fhi >> 8) & 0xFF; + p += incr; - /* Seventh byte */ - *p = (flo >> 8) & 0xFF; - p += incr; + /* Fifth byte */ + *p = fhi & 0xFF; + p += incr; - /* Eighth byte */ - *p = flo & 0xFF; - /* p += incr; */ + /* Sixth byte */ + *p = (flo >> 16) & 0xFF; + p += incr; - /* Done */ - return 0; + /* Seventh byte */ + *p = (flo >> 8) & 0xFF; + p += incr; - Overflow: - PyErr_SetString(PyExc_OverflowError, - "float too large to pack with d format"); - return -1; - } - else { - const unsigned char *s = (unsigned char*)&x; - int i, incr = 1; + /* Eighth byte */ + *p = flo & 0xFF; + /* p += incr; */ - if ((double_format == ieee_little_endian_format && !le) - || (double_format == ieee_big_endian_format && le)) { - p += 7; - incr = -1; - } + /* Done */ + return 0; - for (i = 0; i < 8; i++) { - *p = *s++; - p += incr; - } - return 0; - } + Overflow: + PyErr_SetString(PyExc_OverflowError, + "float too large to pack with d format"); + return -1; } double @@ -2389,174 +2399,154 @@ PyFloat_Unpack2(const char *data, int le) double PyFloat_Unpack4(const char *data, int le) { - unsigned char *p = (unsigned char *)data; - if (float_format == unknown_format) { - unsigned char sign; - int e; - unsigned int f; - double x; - int incr = 1; - - if (le) { - p += 3; - incr = -1; - } - - /* First byte */ - sign = (*p >> 7) & 1; - e = (*p & 0x7F) << 1; - p += incr; - - /* Second byte */ - e |= (*p >> 7) & 1; - f = (*p & 0x7F) << 16; - p += incr; - - if (e == 255) { - PyErr_SetString( - PyExc_ValueError, - "can't unpack IEEE 754 special value " - "on non-IEEE platform"); - return -1; + float_format_type format = get_float_format(); + if (format != unknown_format) { + float r; + uint32_t s = *(uint32_t *)data; + if ((format == ieee_big_endian_format && le) + || (format == ieee_little_endian_format && !le)) { + s = byte_swap_uint32(s); } + memcpy(&r, &s, 4); + return r; + } - /* Third byte */ - f |= *p << 8; - p += incr; - - /* Fourth byte */ - f |= *p; + unsigned char *p = (unsigned char *)data; + unsigned char sign; + int e; + unsigned int f; + double x; + int incr = 1; - x = (double)f / 8388608.0; + if (le) { + p += 3; + incr = -1; + } - /* XXX This sadly ignores Inf/NaN issues */ - if (e == 0) - e = -126; - else { - x += 1.0; - e -= 127; - } - x = ldexp(x, e); + /* First byte */ + sign = (*p >> 7) & 1; + e = (*p & 0x7F) << 1; + p += incr; - if (sign) - x = -x; + /* Second byte */ + e |= (*p >> 7) & 1; + f = (*p & 0x7F) << 16; + p += incr; - return x; + if (e == 255) { + PyErr_SetString( + PyExc_ValueError, + "can't unpack IEEE 754 special value " + "on non-IEEE platform"); + return -1; } - else { - float x; - if ((float_format == ieee_little_endian_format && !le) - || (float_format == ieee_big_endian_format && le)) { - char buf[4]; - char *d = &buf[3]; - int i; + /* Third byte */ + f |= *p << 8; + p += incr; - for (i = 0; i < 4; i++) { - *d-- = *p++; - } - memcpy(&x, buf, 4); - } - else { - memcpy(&x, p, 4); - } + /* Fourth byte */ + f |= *p; + + x = (double)f / 8388608.0; - return x; + /* XXX This sadly ignores Inf/NaN issues */ + if (e == 0) + e = -126; + else { + x += 1.0; + e -= 127; } + x = ldexp(x, e); + + if (sign) + x = -x; + + return x; } double PyFloat_Unpack8(const char *data, int le) { - unsigned char *p = (unsigned char *)data; - if (double_format == unknown_format) { - unsigned char sign; - int e; - unsigned int fhi, flo; - double x; - int incr = 1; - - if (le) { - p += 7; - incr = -1; + float_format_type format = get_double_format(); + if (format != unknown_format) { + double r; + uint64_t s = *(uint64_t *)data; + if ((format == ieee_big_endian_format && le) + || (format == ieee_little_endian_format && !le)) { + s = byte_swap_uint64(s); } + memcpy(&r, &s, 8); + return r; + } - /* First byte */ - sign = (*p >> 7) & 1; - e = (*p & 0x7F) << 4; + unsigned char *p = (unsigned char *)data; + unsigned char sign; + int e; + unsigned int fhi, flo; + double x; + int incr = 1; - p += incr; + if (le) { + p += 7; + incr = -1; + } - /* Second byte */ - e |= (*p >> 4) & 0xF; - fhi = (*p & 0xF) << 24; - p += incr; + /* First byte */ + sign = (*p >> 7) & 1; + e = (*p & 0x7F) << 4; - if (e == 2047) { - PyErr_SetString( - PyExc_ValueError, - "can't unpack IEEE 754 special value " - "on non-IEEE platform"); - return -1.0; - } + p += incr; - /* Third byte */ - fhi |= *p << 16; - p += incr; + /* Second byte */ + e |= (*p >> 4) & 0xF; + fhi = (*p & 0xF) << 24; + p += incr; - /* Fourth byte */ - fhi |= *p << 8; - p += incr; + if (e == 2047) { + PyErr_SetString( + PyExc_ValueError, + "can't unpack IEEE 754 special value " + "on non-IEEE platform"); + return -1.0; + } - /* Fifth byte */ - fhi |= *p; - p += incr; + /* Third byte */ + fhi |= *p << 16; + p += incr; - /* Sixth byte */ - flo = *p << 16; - p += incr; + /* Fourth byte */ + fhi |= *p << 8; + p += incr; - /* Seventh byte */ - flo |= *p << 8; - p += incr; + /* Fifth byte */ + fhi |= *p; + p += incr; - /* Eighth byte */ - flo |= *p; + /* Sixth byte */ + flo = *p << 16; + p += incr; - x = (double)fhi + (double)flo / 16777216.0; /* 2**24 */ - x /= 268435456.0; /* 2**28 */ + /* Seventh byte */ + flo |= *p << 8; + p += incr; - if (e == 0) - e = -1022; - else { - x += 1.0; - e -= 1023; - } - x = ldexp(x, e); + /* Eighth byte */ + flo |= *p; - if (sign) - x = -x; + x = (double)fhi + (double)flo / 16777216.0; /* 2**24 */ + x /= 268435456.0; /* 2**28 */ - return x; - } + if (e == 0) + e = -1022; else { - double x; - - if ((double_format == ieee_little_endian_format && !le) - || (double_format == ieee_big_endian_format && le)) { - char buf[8]; - char *d = &buf[7]; - int i; + x += 1.0; + e -= 1023; + } + x = ldexp(x, e); - for (i = 0; i < 8; i++) { - *d-- = *p++; - } - memcpy(&x, buf, 8); - } - else { - memcpy(&x, p, 8); - } + if (sign) + x = -x; - return x; - } + return x; } diff --git a/Tools/c-analyzer/TODO b/Tools/c-analyzer/TODO index 3d599538510bd9..63e38342f30386 100644 --- a/Tools/c-analyzer/TODO +++ b/Tools/c-analyzer/TODO @@ -56,10 +56,6 @@ Modules/posixmodule.c:initialized static int init Modules/signalmodule.c:initialized static int initialized Modules/timemodule.c:initialized static int initialized Objects/dictobject.c:pydict_global_version static uint64_t pydict_global_version -Objects/floatobject.c:detected_double_format static float_format_type detected_double_format -Objects/floatobject.c:detected_float_format static float_format_type detected_float_format -Objects/floatobject.c:double_format static float_format_type double_format -Objects/floatobject.c:float_format static float_format_type Objects.longobject.c:_Py_quick_int_allocs Py_ssize_t _Py_quick_int_allocs Objects.longobject.c:_Py_quick_neg_int_allocs Py_ssize_t _Py_quick_neg_int_allocs Objects/moduleobject.c:max_module_number static Py_ssize_t max_module_number From 7207b516267dcd5122bd213a4e1648e4156909ee Mon Sep 17 00:00:00 2001 From: ruema Date: Wed, 31 Jul 2024 20:48:35 +0000 Subject: [PATCH 2/8] add always_inline to help gcc optimizing the code --- Objects/floatobject.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 497122d5641e30..dac4165e213ed4 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -1684,7 +1684,11 @@ typedef enum _py_float_format_type float_format_type; #define ieee_big_endian_format _py_float_format_ieee_big_endian #define ieee_little_endian_format _py_float_format_ieee_little_endian -static inline uint32_t byte_swap_uint32(uint32_t x) { +#ifdef __GNUC__ +__attribute__((always_inline)) +#endif +static inline uint32_t +byte_swap_uint32(uint32_t x) { return ( ((x >> 24) & 0xff) | ((x >> 8) & 0xff00) @@ -1693,7 +1697,11 @@ static inline uint32_t byte_swap_uint32(uint32_t x) { ); } -static inline uint64_t byte_swap_uint64(uint64_t x) { +#ifdef __GNUC__ +__attribute__((always_inline)) +#endif +static inline uint64_t +byte_swap_uint64(uint64_t x) { return ( ((x >> 56) & 0xff) | ((x >> 40) & 0xff00) @@ -1724,7 +1732,11 @@ static inline uint64_t byte_swap_uint64(uint64_t x) { These functions are reduced to a no-op via compiler optimizations. */ -static inline float_format_type get_float_format(void) { +#ifdef __GNUC__ +__attribute__((always_inline)) +#endif +static inline float_format_type +get_float_format(void) { if (sizeof(float) == 4) { float y = 16711938.0; uint32_t z; @@ -1742,7 +1754,11 @@ static inline float_format_type get_float_format(void) { return unknown_format; } -static inline float_format_type get_double_format(void) { +#ifdef __GNUC__ +__attribute__((always_inline)) +#endif +static inline float_format_type +get_double_format(void) { if (sizeof(float) == 4) { double y = 9006104071832581.0; uint64_t z; From da52114153c9c6dfcaf28610536267c8a52a0165 Mon Sep 17 00:00:00 2001 From: ruema Date: Wed, 31 Jul 2024 20:58:09 +0000 Subject: [PATCH 3/8] blurb --- .../next/C_API/2024-07-26-21-45-44.gh-issue-324234.Mu5JSt.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/C_API/2024-07-26-21-45-44.gh-issue-324234.Mu5JSt.rst diff --git a/Misc/NEWS.d/next/C_API/2024-07-26-21-45-44.gh-issue-324234.Mu5JSt.rst b/Misc/NEWS.d/next/C_API/2024-07-26-21-45-44.gh-issue-324234.Mu5JSt.rst new file mode 100644 index 00000000000000..9b4eb2565409d7 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-07-26-21-45-44.gh-issue-324234.Mu5JSt.rst @@ -0,0 +1 @@ +Improve performance of `PyFloat_Pack4`, `PyFloat_Pack8`, `PyFloat_Unpack4` and `PyFloat_Unpack8`. From 7deb4da8a11f98dffd0047553efe79bea8f9a70d Mon Sep 17 00:00:00 2001 From: ruema Date: Wed, 31 Jul 2024 21:07:35 +0000 Subject: [PATCH 4/8] remove yet unused _PyFloat_InitState --- Include/internal/pycore_floatobject.h | 1 - Objects/floatobject.c | 8 -------- Python/pylifecycle.c | 2 -- 3 files changed, 11 deletions(-) diff --git a/Include/internal/pycore_floatobject.h b/Include/internal/pycore_floatobject.h index b9be2348345162..a9aee5d599eb0a 100644 --- a/Include/internal/pycore_floatobject.h +++ b/Include/internal/pycore_floatobject.h @@ -12,7 +12,6 @@ extern "C" { /* runtime lifecycle */ -extern void _PyFloat_InitState(PyInterpreterState *); extern PyStatus _PyFloat_InitTypes(PyInterpreterState *); extern void _PyFloat_FiniType(PyInterpreterState *); diff --git a/Objects/floatobject.c b/Objects/floatobject.c index dac4165e213ed4..d850616d4d3e25 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -1978,14 +1978,6 @@ PyTypeObject PyFloat_Type = { }; -void -_PyFloat_InitState(PyInterpreterState *interp) -{ - if (!_Py_IsMainInterpreter(interp)) { - return; - } -} - PyStatus _PyFloat_InitTypes(PyInterpreterState *interp) { diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 6b641c0775f533..38d4f8bed0f8a9 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -697,8 +697,6 @@ pycore_init_global_objects(PyInterpreterState *interp) { PyStatus status; - _PyFloat_InitState(interp); - status = _PyUnicode_InitGlobalObjects(interp); if (_PyStatus_EXCEPTION(status)) { return status; From 4e8ef916de557f2bbc6c29388c2bc1b0cb331c69 Mon Sep 17 00:00:00 2001 From: ruema Date: Wed, 31 Jul 2024 21:44:13 +0000 Subject: [PATCH 5/8] update blurb --- .../next/C_API/2024-07-26-21-45-44.gh-issue-122534.Mu5JSt.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/C_API/2024-07-26-21-45-44.gh-issue-122534.Mu5JSt.rst diff --git a/Misc/NEWS.d/next/C_API/2024-07-26-21-45-44.gh-issue-122534.Mu5JSt.rst b/Misc/NEWS.d/next/C_API/2024-07-26-21-45-44.gh-issue-122534.Mu5JSt.rst new file mode 100644 index 00000000000000..f05070641d5706 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-07-26-21-45-44.gh-issue-122534.Mu5JSt.rst @@ -0,0 +1,2 @@ +`Improve performance of ``PyFloat_Pack4``, ``PyFloat_Pack8``, ``PyFloat_Unpack4`` and ``PyFloat_Unpack8``. +` From 83c1fa06001566d51779a9faf5cf38fc2b5c8fd6 Mon Sep 17 00:00:00 2001 From: ruema Date: Wed, 31 Jul 2024 21:44:33 +0000 Subject: [PATCH 6/8] update blurb --- .../next/C_API/2024-07-26-21-45-44.gh-issue-324234.Mu5JSt.rst | 1 - 1 file changed, 1 deletion(-) delete mode 100644 Misc/NEWS.d/next/C_API/2024-07-26-21-45-44.gh-issue-324234.Mu5JSt.rst diff --git a/Misc/NEWS.d/next/C_API/2024-07-26-21-45-44.gh-issue-324234.Mu5JSt.rst b/Misc/NEWS.d/next/C_API/2024-07-26-21-45-44.gh-issue-324234.Mu5JSt.rst deleted file mode 100644 index 9b4eb2565409d7..00000000000000 --- a/Misc/NEWS.d/next/C_API/2024-07-26-21-45-44.gh-issue-324234.Mu5JSt.rst +++ /dev/null @@ -1 +0,0 @@ -Improve performance of `PyFloat_Pack4`, `PyFloat_Pack8`, `PyFloat_Unpack4` and `PyFloat_Unpack8`. From 64101d48c8c4797b02aa7e0fc9433a2c1108deef Mon Sep 17 00:00:00 2001 From: ruema Date: Wed, 31 Jul 2024 21:57:46 +0000 Subject: [PATCH 7/8] update blurb --- .../2024-07-26-21-45-44.gh-issue-122534.Mu5JSt.rst | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/{C_API => C API}/2024-07-26-21-45-44.gh-issue-122534.Mu5JSt.rst (100%) diff --git a/Misc/NEWS.d/next/C_API/2024-07-26-21-45-44.gh-issue-122534.Mu5JSt.rst b/Misc/NEWS.d/next/C API/2024-07-26-21-45-44.gh-issue-122534.Mu5JSt.rst similarity index 100% rename from Misc/NEWS.d/next/C_API/2024-07-26-21-45-44.gh-issue-122534.Mu5JSt.rst rename to Misc/NEWS.d/next/C API/2024-07-26-21-45-44.gh-issue-122534.Mu5JSt.rst From c450f74748ccfa481de757b70f4d47d720dc62d0 Mon Sep 17 00:00:00 2001 From: ruema Date: Wed, 31 Jul 2024 21:58:15 +0000 Subject: [PATCH 8/8] update blurb --- .../next/C API/2024-07-26-21-45-44.gh-issue-122534.Mu5JSt.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/C API/2024-07-26-21-45-44.gh-issue-122534.Mu5JSt.rst b/Misc/NEWS.d/next/C API/2024-07-26-21-45-44.gh-issue-122534.Mu5JSt.rst index f05070641d5706..66be85fb80b33b 100644 --- a/Misc/NEWS.d/next/C API/2024-07-26-21-45-44.gh-issue-122534.Mu5JSt.rst +++ b/Misc/NEWS.d/next/C API/2024-07-26-21-45-44.gh-issue-122534.Mu5JSt.rst @@ -1,2 +1,2 @@ -`Improve performance of ``PyFloat_Pack4``, ``PyFloat_Pack8``, ``PyFloat_Unpack4`` and ``PyFloat_Unpack8``. -` +Improve performance of :c:func:`PyFloat_Pack4`, :c:func:`PyFloat_Pack8`, :c:func:`PyFloat_Unpack4` and :c:func:`PyFloat_Unpack8`. +