-
-
Notifications
You must be signed in to change notification settings - Fork 33.5k
gh-139772: Add PyDict_FromKeysAndValues() function #141682
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Add PyDict_FromKeysAndValues() and PyDict_FromItems() functions.
|
Benchmark on dict creation with Unicode strings:
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index c14f925b4e7..9987bfa41ba 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -2595,6 +2595,133 @@ create_managed_weakref_nogc_type(PyObject *self, PyObject *Py_UNUSED(args))
}
+static PyObject *
+bench_dict_new(PyObject *ob, PyObject *args)
+{
+ Py_ssize_t size, loops;
+ if (!PyArg_ParseTuple(args, "nn", &size, &loops)) {
+ return NULL;
+ }
+
+ PyTime_t t1, t2;
+ PyTime_PerfCounterRaw(&t1);
+ for (Py_ssize_t loop=0; loop < loops; loop++) {
+ PyObject *d = PyDict_New();
+ if (d == NULL) {
+ return NULL;
+ }
+
+ for (Py_ssize_t i=0; i < size; i++) {
+ PyObject *key = PyUnicode_FromFormat("%zi", i);
+ assert(key != NULL);
+
+ PyObject *value = PyLong_FromLong(i);
+ assert(value != NULL);
+
+ assert(PyDict_SetItem(d, key, value) == 0);
+ Py_DECREF(key);
+ Py_DECREF(value);
+ }
+
+ assert(PyDict_Size(d) == size);
+ Py_DECREF(d);
+ }
+ PyTime_PerfCounterRaw(&t2);
+
+ return PyFloat_FromDouble(PyTime_AsSecondsDouble(t2 - t1));
+}
+
+
+static PyObject *
+bench_dict_fromkeysandvalues(PyObject *ob, PyObject *args)
+{
+ Py_ssize_t size, loops;
+ if (!PyArg_ParseTuple(args, "nn", &size, &loops)) {
+ return NULL;
+ }
+
+ PyTime_t t1, t2;
+ PyTime_PerfCounterRaw(&t1);
+ for (Py_ssize_t loop=0; loop < loops; loop++) {
+ PyObject **keys = (PyObject **)PyMem_Malloc(size * sizeof(PyObject*));
+ if (keys == NULL) {
+ return NULL;
+ }
+ PyObject **values = (PyObject **)PyMem_Malloc(size * sizeof(PyObject*));
+ if (values == NULL) {
+ return NULL;
+ }
+
+ for (Py_ssize_t i=0; i < size; i++) {
+ PyObject *key = PyUnicode_FromFormat("%zi", i);
+ assert(key != NULL);
+
+ PyObject *value = PyLong_FromLong(i);
+ assert(value != NULL);
+
+ keys[i] = key;
+ values[i] = value;
+ }
+
+ PyObject *d = PyDict_FromKeysAndValues(keys, values, size);
+ assert(d != NULL);
+ Py_DECREF(d);
+
+ for (Py_ssize_t i=0; i < size; i++) {
+ Py_DECREF(keys[i]);
+ Py_DECREF(values[i]);
+ }
+ PyMem_Free(keys);
+ PyMem_Free(values);
+ }
+ PyTime_PerfCounterRaw(&t2);
+
+ return PyFloat_FromDouble(PyTime_AsSecondsDouble(t2 - t1));
+}
+
+
+static PyObject *
+bench_dict_fromitems(PyObject *ob, PyObject *args)
+{
+ Py_ssize_t size, loops;
+ if (!PyArg_ParseTuple(args, "nn", &size, &loops)) {
+ return NULL;
+ }
+
+ PyTime_t t1, t2;
+ PyTime_PerfCounterRaw(&t1);
+ for (Py_ssize_t loop=0; loop < loops; loop++) {
+ PyObject **items = (PyObject **)PyMem_Malloc(size * 2 * sizeof(PyObject*));
+ if (items == NULL) {
+ return NULL;
+ }
+
+ for (Py_ssize_t i=0; i < size; i++) {
+ PyObject *key = PyUnicode_FromFormat("%zi", i);
+ assert(key != NULL);
+
+ PyObject *value = PyLong_FromLong(i);
+ assert(value != NULL);
+
+ items[i * 2 ] = key;
+ items[i * 2 + 1] = value;
+ }
+
+ PyObject *d = PyDict_FromItems(items, size);
+ assert(d != NULL);
+ Py_DECREF(d);
+
+ for (Py_ssize_t i=0; i < size * 2; i++) {
+ Py_DECREF(items[i]);
+ }
+ PyMem_Free(items);
+ }
+ PyTime_PerfCounterRaw(&t2);
+
+ return PyFloat_FromDouble(PyTime_AsSecondsDouble(t2 - t1));
+}
+
+
static PyMethodDef TestMethods[] = {
{"set_errno", set_errno, METH_VARARGS},
{"test_config", test_config, METH_NOARGS},
@@ -2691,6 +2818,9 @@ static PyMethodDef TestMethods[] = {
{"toggle_reftrace_printer", toggle_reftrace_printer, METH_O},
{"create_managed_weakref_nogc_type",
create_managed_weakref_nogc_type, METH_NOARGS},
+ {"bench_dict_new", bench_dict_new, METH_VARARGS},
+ {"bench_dict_fromkeysandvalues", bench_dict_fromkeysandvalues, METH_VARARGS},
+ {"bench_dict_fromitems", bench_dict_fromitems, METH_VARARGS},
{NULL, NULL} /* sentinel */
};
Script: |
|
@scoder @davidhewitt: Do these 2 APIs fit your needs to create a dictionary? |
In Cython, we'd probably know up-front whether the keys are all |
|
Is the array in For PyO3's internal creation of dictionaries, we should be able to use any of these functions fine 👍 |
Yes:
Can't you modify your code to produce a flat |
The problem is that the
Right. At the end, proposed APIs are 1.13x faster than calling PyDict_New() + PyDict_SetItem(). |
For PyO3 internal code, yes. However we might want to expose this for users of PyO3. While designing that API, I think I've decided that they will need to do some arranging to the objects anyway. So while I still think having the flexibility to have offsets is nice, please don't block this on me. |
|
I'd like to consider:
|
#139963 implements such API: PyObject* PyDict_FromItems(
PyObject *const *keys,
Py_ssize_t keys_offset,
PyObject *const *values,
Py_ssize_t values_offset,
Py_ssize_t length)Such API is harder to use (more error-prone), and requires more checks. I prefer a simpler API for the two most common use cases. |
Aha, like a batch of Currently, there are already |
Add PyDict_FromKeysAndValues() and PyDict_FromItems() functions.
API:
📚 Documentation preview 📚: https://cpython-previews--141682.org.readthedocs.build/