-
-
Notifications
You must be signed in to change notification settings - Fork 3k
Optimize serialization format for 2 bytes ints #20120
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e0e945c
d0d5291
45cfe34
e7fc76f
d3cf915
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,15 +8,23 @@ | |
| #include "librt_internal.h" | ||
|
|
||
| #define START_SIZE 512 | ||
| #define MAX_SHORT_INT_TAGGED (255 << 1) | ||
|
|
||
| #define MAX_SHORT_LEN 127 | ||
| #define LONG_STR_TAG 1 | ||
| // See comment in read_int_internal() on motivation for these values. | ||
| #define MIN_ONE_BYTE_INT -10 | ||
| #define MAX_ONE_BYTE_INT 117 // 2 ** 7 - 1 - 10 | ||
| #define MIN_TWO_BYTES_INT -100 | ||
| #define MAX_TWO_BYTES_INT 16283 // 2 ** (8 + 6) - 1 - 100 | ||
| #define MIN_FOUR_BYTES_INT -10000 | ||
| #define MAX_FOUR_BYTES_INT 536860911 // 2 ** (3 * 8 + 5) - 1 - 10000 | ||
|
|
||
| #define MIN_SHORT_INT -10 | ||
| #define MAX_SHORT_INT 117 | ||
| #define MEDIUM_INT_TAG 1 | ||
| #define LONG_INT_TAG 3 | ||
| #define TWO_BYTES_INT_BIT 1 | ||
| #define FOUR_BYTES_INT_BIT 2 | ||
| #define LONG_INT_BIT 4 | ||
|
|
||
| #define FOUR_BYTES_INT_TRAILER 3 | ||
| // We add one reserved bit here so that we can potentially support | ||
| // 8 bytes format in the future. | ||
| #define LONG_INT_TRAILER 15 | ||
|
|
||
| #define CPY_BOOL_ERROR 2 | ||
| #define CPY_NONE_ERROR 2 | ||
|
|
@@ -35,13 +43,22 @@ | |
| #define _WRITE(data, type, v) *(type *)(((BufferObject *)data)->buf + ((BufferObject *)data)->pos) = v; \ | ||
| ((BufferObject *)data)->pos += sizeof(type); | ||
|
|
||
| #if PY_BIG_ENDIAN | ||
| uint16_t reverse_16(uint16_t number) { | ||
| return (number << 8) | (number >> 8); | ||
| } | ||
|
|
||
| uint32_t reverse_32(uint32_t number) { | ||
| return ((number & 0xFF) << 24) | ((number & 0xFF00) << 8) | ((number & 0xFF0000) >> 8) | (number >> 24); | ||
| } | ||
| #endif | ||
|
|
||
| typedef struct { | ||
| PyObject_HEAD | ||
| Py_ssize_t pos; | ||
| Py_ssize_t end; | ||
| Py_ssize_t size; | ||
| char *buf; | ||
| PyObject *source; | ||
| } BufferObject; | ||
|
|
||
| static PyTypeObject BufferType; | ||
|
|
@@ -259,26 +276,50 @@ write_bool(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwname | |
| } | ||
|
|
||
| /* | ||
| str format: size followed by UTF-8 bytes | ||
| short strings (len <= 127): single byte for size as `(uint8_t)size << 1` | ||
| long strings: \x01 followed by size as Py_ssize_t | ||
| str format: size as int (see below) followed by UTF-8 bytes | ||
| */ | ||
|
|
||
| static inline CPyTagged | ||
| _read_short_int(PyObject *data, uint8_t first) { | ||
| uint8_t second; | ||
| uint16_t two_more; | ||
| if ((first & TWO_BYTES_INT_BIT) == 0) { | ||
| // Note we use tagged ints since this function can return an error. | ||
| return ((Py_ssize_t)(first >> 1) + MIN_ONE_BYTE_INT) << 1; | ||
| } | ||
| if ((first & FOUR_BYTES_INT_BIT) == 0) { | ||
| _CHECK_READ(data, 1, CPY_INT_TAG) | ||
| second = _READ(data, uint8_t) | ||
| return ((((Py_ssize_t)second) << 6) + (Py_ssize_t)(first >> 2) + MIN_TWO_BYTES_INT) << 1; | ||
| } | ||
| // The caller is responsible to verify this is called only for short ints. | ||
| _CHECK_READ(data, 3, CPY_INT_TAG) | ||
| // TODO: check if compilers emit optimal code for these two reads, and tweak if needed. | ||
| second = _READ(data, uint8_t) | ||
| two_more = _READ(data, uint16_t) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since this code path will be quite rare, we could also read one byte at a time without any real performance impact. This would make this work the same on little and big endian systems.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will not help much supporting big-endian platforms as we always write short integers (i.e. 1, 2, or 4 bytes) as a single value. And changing this for e.g. 2 bytes will probably have some performance impact. I think we can safely postpone this until people will actually ask for big-endian support. And even then I would prefer to use some
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Btw this makes me think that right now this can produce some garbage on big-endian platforms without failing, which is not good. Also I found that there is
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After all I decided to add the big-endian support, it is not that hard. Essentially the idea is that I simply conditionally add byte reverse whenever I read/write anything longer than 1 byte. I know this is not optimal for big-endian but it is conceptually easy to reason about. I will also make a separate PR to handle floats more robustly. |
||
| #if PY_BIG_ENDIAN | ||
| two_more = reverse_16(two_more); | ||
| #endif | ||
| Py_ssize_t higher = (((Py_ssize_t)two_more) << 13) + (((Py_ssize_t)second) << 5); | ||
| return (higher + (Py_ssize_t)(first >> 3) + MIN_FOUR_BYTES_INT) << 1; | ||
| } | ||
|
|
||
| static PyObject* | ||
| read_str_internal(PyObject *data) { | ||
| _CHECK_BUFFER(data, NULL) | ||
|
|
||
| // Read string length. | ||
| Py_ssize_t size; | ||
| _CHECK_READ(data, 1, NULL) | ||
| uint8_t first = _READ(data, uint8_t) | ||
| if (likely(first != LONG_STR_TAG)) { | ||
| // Common case: short string (len <= 127). | ||
| size = (Py_ssize_t)(first >> 1); | ||
| } else { | ||
| _CHECK_READ(data, sizeof(CPyTagged), NULL) | ||
| size = _READ(data, Py_ssize_t) | ||
| if (unlikely(first == LONG_INT_TRAILER)) { | ||
| // Fail fast for invalid/tampered data. | ||
| PyErr_SetString(PyExc_ValueError, "invalid str size"); | ||
| return NULL; | ||
| } | ||
| CPyTagged tagged_size = _read_short_int(data, first); | ||
| if (tagged_size == CPY_INT_TAG) | ||
| return NULL; | ||
| Py_ssize_t size = tagged_size >> 1; | ||
| // Read string content. | ||
| char *buf = ((BufferObject *)data)->buf; | ||
| _CHECK_READ(data, size, NULL) | ||
|
|
@@ -302,6 +343,35 @@ read_str(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnames) | |
| return read_str_internal(data); | ||
| } | ||
|
|
||
| // The caller *must* check that real_value is within allowed range (29 bits). | ||
| static inline char | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add comment explaining that this assumes that |
||
| _write_short_int(PyObject *data, Py_ssize_t real_value) { | ||
| if (real_value >= MIN_ONE_BYTE_INT && real_value <= MAX_ONE_BYTE_INT) { | ||
| _CHECK_SIZE(data, 1) | ||
| _WRITE(data, uint8_t, (uint8_t)(real_value - MIN_ONE_BYTE_INT) << 1) | ||
| ((BufferObject *)data)->end += 1; | ||
| } else if (real_value >= MIN_TWO_BYTES_INT && real_value <= MAX_TWO_BYTES_INT) { | ||
| _CHECK_SIZE(data, 2) | ||
| #if PY_BIG_ENDIAN | ||
| uint16_t to_write = ((uint16_t)(real_value - MIN_TWO_BYTES_INT) << 2) | TWO_BYTES_INT_BIT; | ||
| _WRITE(data, uint16_t, reverse_16(to_write)) | ||
| #else | ||
| _WRITE(data, uint16_t, ((uint16_t)(real_value - MIN_TWO_BYTES_INT) << 2) | TWO_BYTES_INT_BIT) | ||
| #endif | ||
| ((BufferObject *)data)->end += 2; | ||
| } else { | ||
| _CHECK_SIZE(data, 4) | ||
| #if PY_BIG_ENDIAN | ||
| uint32_t to_write = ((uint32_t)(real_value - MIN_FOUR_BYTES_INT) << 3) | FOUR_BYTES_INT_TRAILER; | ||
| _WRITE(data, uint32_t, reverse_32(to_write)) | ||
| #else | ||
| _WRITE(data, uint32_t, ((uint32_t)(real_value - MIN_FOUR_BYTES_INT) << 3) | FOUR_BYTES_INT_TRAILER) | ||
| #endif | ||
| ((BufferObject *)data)->end += 4; | ||
| } | ||
| return CPY_NONE; | ||
| } | ||
|
|
||
| static char | ||
| write_str_internal(PyObject *data, PyObject *value) { | ||
| _CHECK_BUFFER(data, CPY_NONE_ERROR) | ||
|
|
@@ -311,24 +381,20 @@ write_str_internal(PyObject *data, PyObject *value) { | |
| if (unlikely(chunk == NULL)) | ||
| return CPY_NONE_ERROR; | ||
|
|
||
| Py_ssize_t need; | ||
| // Write string length. | ||
| if (likely(size <= MAX_SHORT_LEN)) { | ||
| // Common case: short string (len <= 127) store as single byte. | ||
| need = size + 1; | ||
| _CHECK_SIZE(data, need) | ||
| _WRITE(data, uint8_t, (uint8_t)size << 1) | ||
| if (likely(size >= MIN_FOUR_BYTES_INT && size <= MAX_FOUR_BYTES_INT)) { | ||
| if (_write_short_int(data, size) == CPY_NONE_ERROR) | ||
| return CPY_NONE_ERROR; | ||
| } else { | ||
| need = size + sizeof(Py_ssize_t) + 1; | ||
| _CHECK_SIZE(data, need) | ||
| _WRITE(data, uint8_t, LONG_STR_TAG) | ||
| _WRITE(data, Py_ssize_t, size) | ||
| PyErr_SetString(PyExc_ValueError, "str too long to serialize"); | ||
| return CPY_NONE_ERROR; | ||
| } | ||
| // Write string content. | ||
| _CHECK_SIZE(data, size) | ||
| char *buf = ((BufferObject *)data)->buf; | ||
| memcpy(buf + ((BufferObject *)data)->pos, chunk, size); | ||
| ((BufferObject *)data)->pos += size; | ||
| ((BufferObject *)data)->end += need; | ||
| ((BufferObject *)data)->end += size; | ||
| return CPY_NONE; | ||
| } | ||
|
|
||
|
|
@@ -353,26 +419,25 @@ write_str(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnames | |
| } | ||
|
|
||
| /* | ||
| bytes format: size followed by bytes | ||
| short bytes (len <= 127): single byte for size as `(uint8_t)size << 1` | ||
| long bytes: \x01 followed by size as Py_ssize_t | ||
| bytes format: size as int (see below) followed by bytes | ||
| */ | ||
|
|
||
| static PyObject* | ||
| read_bytes_internal(PyObject *data) { | ||
| _CHECK_BUFFER(data, NULL) | ||
|
|
||
| // Read length. | ||
| Py_ssize_t size; | ||
| _CHECK_READ(data, 1, NULL) | ||
| uint8_t first = _READ(data, uint8_t) | ||
| if (likely(first != LONG_STR_TAG)) { | ||
| // Common case: short bytes (len <= 127). | ||
| size = (Py_ssize_t)(first >> 1); | ||
| } else { | ||
| _CHECK_READ(data, sizeof(CPyTagged), NULL) | ||
| size = _READ(data, Py_ssize_t) | ||
| if (unlikely(first == LONG_INT_TRAILER)) { | ||
| // Fail fast for invalid/tampered data. | ||
| PyErr_SetString(PyExc_ValueError, "invalid bytes size"); | ||
| return NULL; | ||
| } | ||
| CPyTagged tagged_size = _read_short_int(data, first); | ||
| if (tagged_size == CPY_INT_TAG) | ||
| return NULL; | ||
| Py_ssize_t size = tagged_size >> 1; | ||
| // Read bytes content. | ||
| char *buf = ((BufferObject *)data)->buf; | ||
| _CHECK_READ(data, size, NULL) | ||
|
|
@@ -405,24 +470,20 @@ write_bytes_internal(PyObject *data, PyObject *value) { | |
| return CPY_NONE_ERROR; | ||
| Py_ssize_t size = PyBytes_GET_SIZE(value); | ||
|
|
||
| Py_ssize_t need; | ||
| // Write length. | ||
| if (likely(size <= MAX_SHORT_LEN)) { | ||
| // Common case: short bytes (len <= 127) store as single byte. | ||
| need = size + 1; | ||
| _CHECK_SIZE(data, need) | ||
| _WRITE(data, uint8_t, (uint8_t)size << 1) | ||
| if (likely(size >= MIN_FOUR_BYTES_INT && size <= MAX_FOUR_BYTES_INT)) { | ||
| if (_write_short_int(data, size) == CPY_NONE_ERROR) | ||
| return CPY_NONE_ERROR; | ||
| } else { | ||
| need = size + sizeof(Py_ssize_t) + 1; | ||
| _CHECK_SIZE(data, need) | ||
| _WRITE(data, uint8_t, LONG_STR_TAG) | ||
| _WRITE(data, Py_ssize_t, size) | ||
| PyErr_SetString(PyExc_ValueError, "bytes too long to serialize"); | ||
| return CPY_NONE_ERROR; | ||
| } | ||
| // Write bytes content. | ||
| _CHECK_SIZE(data, size) | ||
| char *buf = ((BufferObject *)data)->buf; | ||
| memcpy(buf + ((BufferObject *)data)->pos, chunk, size); | ||
| ((BufferObject *)data)->pos += size; | ||
| ((BufferObject *)data)->end += need; | ||
| ((BufferObject *)data)->end += size; | ||
| return CPY_NONE; | ||
| } | ||
|
|
||
|
|
@@ -455,7 +516,7 @@ static double | |
| read_float_internal(PyObject *data) { | ||
| _CHECK_BUFFER(data, CPY_FLOAT_ERROR) | ||
| _CHECK_READ(data, sizeof(double), CPY_FLOAT_ERROR) | ||
| double res = _READ(data, double); | ||
| double res = _READ(data, double) | ||
| return res; | ||
| } | ||
|
|
||
|
|
@@ -505,9 +566,13 @@ write_float(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnam | |
|
|
||
| /* | ||
| int format: | ||
| most common values (-10 <= value <= 117): single byte as `(uint8_t)(value + 10) << 1` | ||
| medium values (fit in CPyTagged): \x01 followed by CPyTagged value | ||
| long values (very rare): \x03 followed by decimal string (see str format) | ||
| one byte: last bit 0, 7 bits used | ||
| two bytes: last two bits 01, 14 bits used | ||
| four bytes: last three bits 011, 29 bits used | ||
| everything else: 00001111 followed by serialized string representation | ||
|
|
||
| Note: for fixed size formats we skew ranges towards more positive values, | ||
| since negative integers are much more rare. | ||
| */ | ||
|
|
||
| static CPyTagged | ||
|
|
@@ -516,22 +581,17 @@ read_int_internal(PyObject *data) { | |
| _CHECK_READ(data, 1, CPY_INT_TAG) | ||
|
|
||
| uint8_t first = _READ(data, uint8_t) | ||
| if ((first & MEDIUM_INT_TAG) == 0) { | ||
| // Most common case: int that is small in absolute value. | ||
| return ((Py_ssize_t)(first >> 1) + MIN_SHORT_INT) << 1; | ||
| } | ||
| if (first == MEDIUM_INT_TAG) { | ||
| _CHECK_READ(data, sizeof(CPyTagged), CPY_INT_TAG) | ||
| CPyTagged ret = _READ(data, CPyTagged) | ||
| return ret; | ||
| if (likely(first != LONG_INT_TRAILER)) { | ||
| return _read_short_int(data, first); | ||
| } | ||
| // People who have literal ints not fitting in size_t should be punished :-) | ||
| PyObject *str_ret = read_str_internal(data); | ||
| if (unlikely(str_ret == NULL)) | ||
| return CPY_INT_TAG; | ||
| PyObject* ret_long = PyLong_FromUnicodeObject(str_ret, 10); | ||
| Py_DECREF(str_ret); | ||
| return ((CPyTagged)ret_long) | CPY_INT_TAG; | ||
| if (ret_long == NULL) | ||
| return CPY_INT_TAG; | ||
| return CPyTagged_StealFromObject(ret_long); | ||
| } | ||
|
|
||
| static PyObject* | ||
|
|
@@ -549,36 +609,38 @@ read_int(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnames) | |
| return CPyTagged_StealAsObject(retval); | ||
| } | ||
|
|
||
| static inline char | ||
| _write_long_int(PyObject *data, CPyTagged value) { | ||
| // TODO(jukka): write a more compact/optimal format for arbitrary length ints. | ||
| _CHECK_SIZE(data, 1) | ||
| _WRITE(data, uint8_t, LONG_INT_TRAILER) | ||
| ((BufferObject *)data)->end += 1; | ||
| PyObject* int_value = CPyTagged_AsObject(value); | ||
| if (unlikely(int_value == NULL)) | ||
| return CPY_NONE_ERROR; | ||
| PyObject *str_value = PyObject_Str(int_value); | ||
| Py_DECREF(int_value); | ||
| if (unlikely(str_value == NULL)) | ||
| return CPY_NONE_ERROR; | ||
| char res = write_str_internal(data, str_value); | ||
| Py_DECREF(str_value); | ||
| return res; | ||
| } | ||
|
|
||
| static char | ||
| write_int_internal(PyObject *data, CPyTagged value) { | ||
| _CHECK_BUFFER(data, CPY_NONE_ERROR) | ||
|
|
||
| if (likely((value & CPY_INT_TAG) == 0)) { | ||
| Py_ssize_t real_value = CPyTagged_ShortAsSsize_t(value); | ||
| if (real_value >= MIN_SHORT_INT && real_value <= MAX_SHORT_INT) { | ||
| // Most common case: int that is small in absolute value. | ||
| _CHECK_SIZE(data, 1) | ||
| _WRITE(data, uint8_t, (uint8_t)(real_value - MIN_SHORT_INT) << 1) | ||
| ((BufferObject *)data)->end += 1; | ||
| if (likely(real_value >= MIN_FOUR_BYTES_INT && real_value <= MAX_FOUR_BYTES_INT)) { | ||
| return _write_short_int(data, real_value); | ||
| } else { | ||
| _CHECK_SIZE(data, sizeof(CPyTagged) + 1) | ||
| _WRITE(data, uint8_t, MEDIUM_INT_TAG) | ||
| _WRITE(data, CPyTagged, value) | ||
| ((BufferObject *)data)->end += sizeof(CPyTagged) + 1; | ||
| return _write_long_int(data, value); | ||
| } | ||
| } else { | ||
| _CHECK_SIZE(data, 1) | ||
| _WRITE(data, uint8_t, LONG_INT_TAG) | ||
| ((BufferObject *)data)->end += 1; | ||
| PyObject *str_value = PyObject_Str(CPyTagged_LongAsObject(value)); | ||
| if (unlikely(str_value == NULL)) | ||
| return CPY_NONE_ERROR; | ||
| char res = write_str_internal(data, str_value); | ||
| Py_DECREF(str_value); | ||
| if (unlikely(res == CPY_NONE_ERROR)) | ||
| return CPY_NONE_ERROR; | ||
| return _write_long_int(data, value); | ||
| } | ||
| return CPY_NONE; | ||
| } | ||
|
|
||
| static PyObject* | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we could extend the negative range further here (e.g. make it symmetric with the positive range), since whether the upper bound is ~512M or ~256M probably won't make much difference in practice.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FWIW this will not really simplify code (and will actually make it less consistent). I would prefer to keep this as is, unless there is a reason to use 4 bytes storage for large negative integers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, I don't think we'll have many use cases where we need large negative integers. This would mainly be useful if this was a general-purpose library, but it's specific to mypy and likely to remain so.