From d21bf2ba317d750189a0a65e7a070f49e689a6d1 Mon Sep 17 00:00:00 2001 From: thalassemia <67928790+thalassemia@users.noreply.github.com> Date: Wed, 24 May 2023 11:12:37 -0700 Subject: [PATCH 1/2] PYTHON-3717 Speed up _type_marker check in BSON --- bson/_cbsonmodule.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c index e45a11be32..012dc9cc3e 100644 --- a/bson/_cbsonmodule.c +++ b/bson/_cbsonmodule.c @@ -428,12 +428,13 @@ static int _load_python_objects(PyObject* module) { * * Return the type marker, 0 if there is no marker, or -1 on failure. */ +static PyObject *TYPEMARKERSTR; static long _type_marker(PyObject* object) { PyObject* type_marker = NULL; long type = 0; - if (PyObject_HasAttrString(object, "_type_marker")) { - type_marker = PyObject_GetAttrString(object, "_type_marker"); + if (PyObject_HasAttr(object, TYPEMARKERSTR)) { + type_marker = PyObject_GetAttr(object, TYPEMARKERSTR); if (type_marker == NULL) { return -1; } @@ -450,13 +451,6 @@ static long _type_marker(PyObject* object) { if (type_marker && PyLong_CheckExact(type_marker)) { type = PyLong_AsLong(type_marker); Py_DECREF(type_marker); - /* - * Py(Long|Int)_AsLong returns -1 for error but -1 is a valid value - * so we call PyErr_Occurred to differentiate. - */ - if (type == -1 && PyErr_Occurred()) { - return -1; - } } else { Py_XDECREF(type_marker); } @@ -3031,5 +3025,7 @@ PyInit__cbson(void) INITERROR; } + TYPEMARKERSTR = PyUnicode_FromString("_type_marker"); + return m; } From 91edbdd0bd8fe3b08e8485f87e62645f07b624ff Mon Sep 17 00:00:00 2001 From: thalassemia <67928790+thalassemia@users.noreply.github.com> Date: Thu, 25 May 2023 11:01:37 -0700 Subject: [PATCH 2/2] Add _type_marker_str to module_state for teardown --- bson/_cbsonmodule.c | 48 +++++++++++++++++++++------------------ bson/_cbsonmodule.h | 2 +- pymongo/_cmessagemodule.c | 33 ++++++++++++++++----------- 3 files changed, 47 insertions(+), 36 deletions(-) diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c index 012dc9cc3e..8e5e8b6c0c 100644 --- a/bson/_cbsonmodule.c +++ b/bson/_cbsonmodule.c @@ -55,6 +55,7 @@ struct module_state { PyObject* DatetimeMS; PyObject* _min_datetime_ms; PyObject* _max_datetime_ms; + PyObject* _type_marker_str; }; #define GETSTATE(m) ((struct module_state*)PyModule_GetState(m)) @@ -378,6 +379,9 @@ static int _load_python_objects(PyObject* module) { PyObject* compiled = NULL; struct module_state *state = GETSTATE(module); + /* Python str for faster _type_marker check */ + state->_type_marker_str = PyUnicode_FromString("_type_marker"); + if (_load_object(&state->Binary, "bson.binary", "Binary") || _load_object(&state->Code, "bson.code", "Code") || _load_object(&state->ObjectId, "bson.objectid", "ObjectId") || @@ -428,13 +432,12 @@ static int _load_python_objects(PyObject* module) { * * Return the type marker, 0 if there is no marker, or -1 on failure. */ -static PyObject *TYPEMARKERSTR; -static long _type_marker(PyObject* object) { +static long _type_marker(PyObject* object, PyObject* _type_marker_str) { PyObject* type_marker = NULL; long type = 0; - if (PyObject_HasAttr(object, TYPEMARKERSTR)) { - type_marker = PyObject_GetAttr(object, TYPEMARKERSTR); + if (PyObject_HasAttr(object, _type_marker_str)) { + type_marker = PyObject_GetAttr(object, _type_marker_str); if (type_marker == NULL) { return -1; } @@ -498,13 +501,12 @@ int cbson_convert_type_registry(PyObject* registry_obj, type_registry_t* registr return 0; } -/* Fill out a codec_options_t* from a CodecOptions object. Use with the "O&" - * format spec in PyArg_ParseTuple. +/* Fill out a codec_options_t* from a CodecOptions object. * * Return 1 on success. options->document_class is a new reference. * Return 0 on failure. */ -int convert_codec_options(PyObject* options_obj, void* p) { +int convert_codec_options(PyObject* self, PyObject* options_obj, void* p) { codec_options_t* options = (codec_options_t*)p; PyObject* type_registry_obj = NULL; long type_marker; @@ -521,7 +523,8 @@ int convert_codec_options(PyObject* options_obj, void* p) { &options->datetime_conversion)) return 0; - type_marker = _type_marker(options->document_class); + type_marker = _type_marker(options->document_class, + GETSTATE(self)->_type_marker_str); if (type_marker < 0) { return 0; } @@ -724,7 +727,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, * problems with python sub interpreters. Our custom types should * have a _type_marker attribute, which we can switch on instead. */ - long type = _type_marker(value); + long type = _type_marker(value, state->_type_marker_str); if (type < 0) { return 0; } @@ -1376,7 +1379,7 @@ int write_dict(PyObject* self, buffer_t buffer, long type_marker; /* check for RawBSONDocument */ - type_marker = _type_marker(dict); + type_marker = _type_marker(dict, state->_type_marker_str); if (type_marker < 0) { return 0; } @@ -1498,18 +1501,20 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) { PyObject* result; unsigned char check_keys; unsigned char top_level = 1; + PyObject* options_obj; codec_options_t options; buffer_t buffer; PyObject* raw_bson_document_bytes_obj; long type_marker; - if (!PyArg_ParseTuple(args, "ObO&|b", &dict, &check_keys, - convert_codec_options, &options, &top_level)) { + if (!(PyArg_ParseTuple(args, "ObO|b", &dict, &check_keys, + &options_obj, &top_level) && + convert_codec_options(self, options_obj, &options))) { return NULL; } /* check for RawBSONDocument */ - type_marker = _type_marker(dict); + type_marker = _type_marker(dict, GETSTATE(self)->_type_marker_str); if (type_marker < 0) { destroy_codec_options(&options); return NULL; @@ -2520,6 +2525,7 @@ static PyObject* _cbson_element_to_dict(PyObject* self, PyObject* args) { /* TODO: Support buffer protocol */ char* string; PyObject* bson; + PyObject* options_obj; codec_options_t options; unsigned position; unsigned max; @@ -2529,8 +2535,9 @@ static PyObject* _cbson_element_to_dict(PyObject* self, PyObject* args) { PyObject* value; PyObject* result_tuple; - if (!PyArg_ParseTuple(args, "OIIO&p", &bson, &position, &max, - convert_codec_options, &options, &raw_array)) { + if (!(PyArg_ParseTuple(args, "OIIOp", &bson, &position, &max, + &options_obj, &raw_array) && + convert_codec_options(self, options_obj, &options))) { return NULL; } @@ -2632,7 +2639,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { Py_buffer view = {0}; if (! (PyArg_ParseTuple(args, "OO", &bson, &options_obj) && - convert_codec_options(options_obj, &options))) { + convert_codec_options(self, options_obj, &options))) { return result; } @@ -2709,10 +2716,8 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { PyObject* options_obj = NULL; Py_buffer view = {0}; - if (!PyArg_ParseTuple(args, "OO", &bson, &options_obj)) { - return NULL; - } - if (!convert_codec_options(options_obj, &options)) { + if (!(PyArg_ParseTuple(args, "OO", &bson, &options_obj) && + convert_codec_options(self, options_obj, &options))) { return NULL; } @@ -2960,6 +2965,7 @@ static int _cbson_clear(PyObject *m) { Py_CLEAR(GETSTATE(m)->MaxKey); Py_CLEAR(GETSTATE(m)->UTC); Py_CLEAR(GETSTATE(m)->REType); + Py_CLEAR(GETSTATE(m)->_type_marker_str); return 0; } @@ -3025,7 +3031,5 @@ PyInit__cbson(void) INITERROR; } - TYPEMARKERSTR = PyUnicode_FromString("_type_marker"); - return m; } diff --git a/bson/_cbsonmodule.h b/bson/_cbsonmodule.h index 6ff453b8ff..682205bd84 100644 --- a/bson/_cbsonmodule.h +++ b/bson/_cbsonmodule.h @@ -86,7 +86,7 @@ typedef struct codec_options_t { #define _cbson_convert_codec_options_INDEX 4 #define _cbson_convert_codec_options_RETURN int -#define _cbson_convert_codec_options_PROTO (PyObject* options_obj, void* p) +#define _cbson_convert_codec_options_PROTO (PyObject* self, PyObject* options_obj, void* p) #define _cbson_destroy_codec_options_INDEX 5 #define _cbson_destroy_codec_options_RETURN void diff --git a/pymongo/_cmessagemodule.c b/pymongo/_cmessagemodule.c index 2f03ce73e0..7d5e2db3cc 100644 --- a/pymongo/_cmessagemodule.c +++ b/pymongo/_cmessagemodule.c @@ -75,19 +75,21 @@ static PyObject* _cbson_query_message(PyObject* self, PyObject* args) { int num_to_return; PyObject* query; PyObject* field_selector; + PyObject* options_obj; codec_options_t options; buffer_t buffer = NULL; int length_location, message_length; PyObject* result = NULL; - if (!PyArg_ParseTuple(args, "Iet#iiOOO&", + if (!(PyArg_ParseTuple(args, "Iet#iiOOO", &flags, "utf-8", &collection_name, &collection_name_length, &num_to_skip, &num_to_return, &query, &field_selector, - convert_codec_options, &options)) { + &options_obj) && + convert_codec_options(state->_cbson, options_obj, &options))) { return NULL; } buffer = pymongo_buffer_new(); @@ -220,6 +222,7 @@ static PyObject* _cbson_op_msg(PyObject* self, PyObject* args) { Py_ssize_t identifier_length = 0; PyObject* docs; PyObject* doc; + PyObject* options_obj; codec_options_t options; buffer_t buffer = NULL; int length_location, message_length; @@ -229,14 +232,15 @@ static PyObject* _cbson_op_msg(PyObject* self, PyObject* args) { PyObject* iterator = NULL; /*flags, command, identifier, docs, opts*/ - if (!PyArg_ParseTuple(args, "IOet#OO&", + if (!(PyArg_ParseTuple(args, "IOet#OO", &flags, &command, "utf-8", &identifier, &identifier_length, &docs, - convert_codec_options, &options)) { + &options_obj) && + convert_codec_options(state->_cbson, options_obj, &options))) { return NULL; } buffer = pymongo_buffer_new(); @@ -528,14 +532,15 @@ _cbson_encode_batched_op_msg(PyObject* self, PyObject* args) { PyObject* ctx = NULL; PyObject* to_publish = NULL; PyObject* result = NULL; + PyObject* options_obj; codec_options_t options; buffer_t buffer; struct module_state *state = GETSTATE(self); - if (!PyArg_ParseTuple(args, "bOObO&O", + if (!(PyArg_ParseTuple(args, "bOObOO", &op, &command, &docs, &ack, - convert_codec_options, &options, - &ctx)) { + &options_obj, &ctx) && + convert_codec_options(state->_cbson, options_obj, &options))) { return NULL; } if (!(buffer = pymongo_buffer_new())) { @@ -581,14 +586,15 @@ _cbson_batched_op_msg(PyObject* self, PyObject* args) { PyObject* ctx = NULL; PyObject* to_publish = NULL; PyObject* result = NULL; + PyObject* options_obj; codec_options_t options; buffer_t buffer; struct module_state *state = GETSTATE(self); - if (!PyArg_ParseTuple(args, "bOObO&O", + if (!(PyArg_ParseTuple(args, "bOObOO", &op, &command, &docs, &ack, - convert_codec_options, &options, - &ctx)) { + &options_obj, &ctx) && + convert_codec_options(state->_cbson, options_obj, &options))) { return NULL; } if (!(buffer = pymongo_buffer_new())) { @@ -850,14 +856,15 @@ _cbson_encode_batched_write_command(PyObject* self, PyObject* args) { PyObject* ctx = NULL; PyObject* to_publish = NULL; PyObject* result = NULL; + PyObject* options_obj; codec_options_t options; buffer_t buffer; struct module_state *state = GETSTATE(self); - if (!PyArg_ParseTuple(args, "et#bOOO&O", "utf-8", + if (!(PyArg_ParseTuple(args, "et#bOOOO", "utf-8", &ns, &ns_len, &op, &command, &docs, - convert_codec_options, &options, - &ctx)) { + &options_obj, &ctx) && + convert_codec_options(state->_cbson, options_obj, &options))) { return NULL; } if (!(buffer = pymongo_buffer_new())) {