From 347dcc979a3f9bfcdfd0313c0c3feb02f47d96ee Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 9 May 2024 16:05:04 -0400 Subject: [PATCH 1/9] Switch to the new PyO3 0.21 APIs. --- py-polars/src/map/lazy.rs | 18 +++++++++--------- py-polars/src/on_startup.rs | 4 ++-- py-polars/src/series/mod.rs | 2 +- py-polars/src/series/numpy_ufunc.rs | 6 +++--- py-polars/src/to_numpy.rs | 3 +-- 5 files changed, 16 insertions(+), 17 deletions(-) diff --git a/py-polars/src/map/lazy.rs b/py-polars/src/map/lazy.rs index 75084783a295..88b05beb16cc 100644 --- a/py-polars/src/map/lazy.rs +++ b/py-polars/src/map/lazy.rs @@ -29,14 +29,14 @@ impl ToSeries for PyObject { let res = py_polars_module .getattr(py, "Series") .unwrap() - .call1(py, (name, PyList::new(py, [self]))); + .call1(py, (name, PyList::new_bound(py, [self]))); match res { Ok(python_s) => python_s.getattr(py, "_s").unwrap(), Err(_) => { polars_bail!(ComputeError: "expected a something that could convert to a `Series` but got: {}", - self.as_ref(py).get_type() + self.bind(py).get_type() ) }, } @@ -53,7 +53,7 @@ pub(crate) fn call_lambda_with_series( s: Series, lambda: &PyObject, ) -> PyResult { - let pypolars = POLARS.downcast::(py).unwrap(); + let pypolars = POLARS.downcast_bound::(py).unwrap(); // create a PySeries struct/object for Python let pyseries = PySeries::new(s); @@ -75,7 +75,7 @@ pub(crate) fn binary_lambda( ) -> PolarsResult> { Python::with_gil(|py| { // get the pypolars module - let pypolars = PyModule::import(py, "polars").unwrap(); + let pypolars = PyModule::import_bound(py, "polars").unwrap(); // create a PySeries struct/object for Python let pyseries_a = PySeries::new(a); let pyseries_b = PySeries::new(b); @@ -97,7 +97,7 @@ pub(crate) fn binary_lambda( match lambda.call1(py, (python_series_wrapper_a, python_series_wrapper_b)) { Ok(pyobj) => pyobj, Err(e) => polars_bail!( - ComputeError: "custom python function failed: {}", e.value(py), + ComputeError: "custom python function failed: {}", e.value_bound(py), ), }; let pyseries = if let Ok(expr) = result_series_wrapper.getattr(py, "_pyexpr") { @@ -142,7 +142,7 @@ pub(crate) fn call_lambda_with_series_slice( lambda: &PyObject, polars_module: &PyObject, ) -> PyObject { - let pypolars = polars_module.downcast::(py).unwrap(); + let pypolars = polars_module.downcast_bound::(py).unwrap(); // create a PySeries struct/object for Python let iter = s.iter().map(|s| { @@ -153,12 +153,12 @@ pub(crate) fn call_lambda_with_series_slice( python_series_wrapper }); - let wrapped_s = PyList::new(py, iter); + let wrapped_s = PyList::new_bound(py, iter); // call the lambda and get a python side Series wrapper match lambda.call1(py, (wrapped_s,)) { Ok(pyobj) => pyobj, - Err(e) => panic!("python function failed: {}", e.value(py)), + Err(e) => panic!("python function failed: {}", e.value_bound(py)), } } @@ -172,7 +172,7 @@ pub fn map_mul( ) -> PyExpr { // get the pypolars module // do the import outside of the function to prevent import side effects in a hot loop. - let pypolars = PyModule::import(py, "polars").unwrap().to_object(py); + let pypolars = PyModule::import_bound(py, "polars").unwrap().to_object(py); let function = move |s: &mut [Series]| { Python::with_gil(|py| { diff --git a/py-polars/src/on_startup.rs b/py-polars/src/on_startup.rs index af83c2e1ed12..f8bc8fafb683 100644 --- a/py-polars/src/on_startup.rs +++ b/py-polars/src/on_startup.rs @@ -39,7 +39,7 @@ fn python_function_caller_df(df: DataFrame, lambda: &PyObject) -> PolarsResult PolarsResult PyResult { + fn $name(&self, lambda: &Bound) -> PyResult { // numpy array object, and a *mut ptr Python::with_gil(|py| { let size = self.len(); @@ -80,7 +80,7 @@ macro_rules! impl_ufuncs { debug_assert_eq!(get_refcnt(&out_array), 1); // inserting it in a tuple increase the reference count by 1. - let args = PyTuple::new(py, &[out_array.clone()]); + let args = PyTuple::new_bound(py, &[out_array.clone()]); debug_assert_eq!(get_refcnt(&out_array), 2); // whatever the result, we must take the leaked memory ownership back @@ -88,7 +88,7 @@ macro_rules! impl_ufuncs { Ok(_) => { // if this assert fails, the lambda has taken a reference to the object, so we must panic // args and the lambda return have a reference, making a total of 3 - assert_eq!(get_refcnt(&out_array), 3); + assert!(get_refcnt(&out_array) <= 3); let validity = self.series.chunks()[0].validity().cloned(); let ca = diff --git a/py-polars/src/to_numpy.rs b/py-polars/src/to_numpy.rs index d1894e2dfba9..51587dc49315 100644 --- a/py-polars/src/to_numpy.rs +++ b/py-polars/src/to_numpy.rs @@ -43,8 +43,7 @@ where std::mem::forget(owner); PY_ARRAY_API.PyArray_SetBaseObject(py, array as *mut PyArrayObject, owner_ptr); - let any: &PyAny = py.from_owned_ptr(array); - any.into_py(py) + Py::from_owned_ptr(py, array) } #[pymethods] From 66aee681a49c56281a72dfd552f5af041e5207e1 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 9 May 2024 16:10:00 -0400 Subject: [PATCH 2/9] Compile even if gil-refs is feature is disabled. --- py-polars/src/map/dataframe.rs | 8 ++++---- py-polars/src/map/mod.rs | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/py-polars/src/map/dataframe.rs b/py-polars/src/map/dataframe.rs index 7b6a58879824..59035a0817fc 100644 --- a/py-polars/src/map/dataframe.rs +++ b/py-polars/src/map/dataframe.rs @@ -89,7 +89,7 @@ pub fn apply_lambda_unknown<'a>( py, lambda, null_count, - first_value.as_deref(), + first_value, ) .into_series(), ) @@ -204,17 +204,17 @@ pub fn apply_lambda_with_string_out_type<'a>( py: Python, lambda: Bound<'a, PyAny>, init_null_count: usize, - first_value: Option<&str>, + first_value: Option, ) -> StringChunked { let skip = usize::from(first_value.is_some()); if init_null_count == df.height() { ChunkedArray::full_null("map", df.height()) } else { - let iter = apply_iter::>(df, py, lambda, init_null_count, skip); + let iter = apply_iter::(df, py, lambda, init_null_count, skip); iterator_to_string( iter, init_null_count, - first_value.map(Cow::Borrowed), + first_value, "map", df.height(), ) diff --git a/py-polars/src/map/mod.rs b/py-polars/src/map/mod.rs index f6ffe15a9bdf..0af601338352 100644 --- a/py-polars/src/map/mod.rs +++ b/py-polars/src/map/mod.rs @@ -2,7 +2,6 @@ pub mod dataframe; pub mod lazy; pub mod series; -use std::borrow::Cow; use std::collections::BTreeMap; use polars::chunked_array::builder::get_list_builder; From bd0d47da257c9a80d638b947830a3695e68cca8b Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 9 May 2024 16:35:48 -0400 Subject: [PATCH 3/9] Remove PyO3 gil-refs crate feature. --- py-polars/Cargo.toml | 2 +- py-polars/src/conversion/any_value.rs | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/py-polars/Cargo.toml b/py-polars/Cargo.toml index cb530b2d1445..200ccb34d6cd 100644 --- a/py-polars/Cargo.toml +++ b/py-polars/Cargo.toml @@ -27,7 +27,7 @@ ndarray = { workspace = true } num-traits = { workspace = true } numpy = { version = "0.21", default-features = false } once_cell = { workspace = true } -pyo3 = { workspace = true, features = ["abi3-py38", "extension-module", "multiple-pymethods", "gil-refs"] } +pyo3 = { workspace = true, features = ["abi3-py38", "extension-module", "multiple-pymethods"] } pyo3-built = { version = "0.5", optional = true } recursive = { workspace = true } serde_json = { workspace = true, optional = true } diff --git a/py-polars/src/conversion/any_value.rs b/py-polars/src/conversion/any_value.rs index 84da08f4d23f..6f7389e134ef 100644 --- a/py-polars/src/conversion/any_value.rs +++ b/py-polars/src/conversion/any_value.rs @@ -8,6 +8,7 @@ use polars_core::utils::any_values_to_supertype_and_n_dtypes; use pyo3::exceptions::{PyOverflowError, PyTypeError}; use pyo3::intern; use pyo3::prelude::*; +use pyo3::pybacked::PyBackedBytes; use pyo3::types::{PyBool, PyBytes, PyDict, PyFloat, PyInt, PyList, PySequence, PyString, PyTuple}; use super::{decimal_to_digits, struct_dict, ObjectValue, Wrap}; @@ -171,8 +172,8 @@ pub(crate) fn py_object_to_any_value<'py>( } fn get_bytes<'py>(ob: &Bound<'py, PyAny>, _strict: bool) -> PyResult> { - let value = ob.extract::<&'py [u8]>().unwrap(); - Ok(AnyValue::Binary(value)) + let value = ob.extract::().unwrap(); + Ok(AnyValue::BinaryOwned(value.to_vec())) } fn get_date(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { From 4847049f72f15f84e4327cd50efe41f3347b71f6 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 9 May 2024 16:44:44 -0400 Subject: [PATCH 4/9] Switch to extract_bound() API. --- py-polars/src/conversion/mod.rs | 135 ++++++++++++++++---------------- py-polars/src/datatypes.rs | 6 +- 2 files changed, 72 insertions(+), 69 deletions(-) diff --git a/py-polars/src/conversion/mod.rs b/py-polars/src/conversion/mod.rs index cb6ed01d9463..a5e40e8c62bb 100644 --- a/py-polars/src/conversion/mod.rs +++ b/py-polars/src/conversion/mod.rs @@ -275,18 +275,21 @@ impl ToPyObject for Wrap { } } -impl FromPyObject<'_> for Wrap { - fn extract(ob: &PyAny) -> PyResult { +impl<'py> FromPyObject<'py> for Wrap { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { let py = ob.py(); - let name = ob.getattr(intern!(py, "name"))?.str()?.to_str()?; + let name = ob + .getattr(intern!(py, "name"))? + .str()? + .extract::()?; let dtype = ob .getattr(intern!(py, "dtype"))? .extract::>()?; - Ok(Wrap(Field::new(name, dtype.0))) + Ok(Wrap(Field::new(&name, dtype.0))) } } -impl FromPyObject<'_> for Wrap { +impl<'py> FromPyObject<'py> for Wrap { fn extract(ob: &PyAny) -> PyResult { let py = ob.py(); let type_name = ob.get_type().qualname()?; @@ -429,15 +432,15 @@ impl ToPyObject for Wrap { } impl<'s> FromPyObject<'s> for Wrap> { - fn extract(ob: &'s PyAny) -> PyResult { + fn extract_bound(ob: &Bound<'s, PyAny>) -> PyResult { let vals = ob.extract::>>>()?; let vals = vec_extract_wrapped(vals); Ok(Wrap(Row(vals))) } } -impl FromPyObject<'_> for Wrap { - fn extract(ob: &PyAny) -> PyResult { +impl<'py> FromPyObject<'py> for Wrap { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { let dict = ob.extract::<&PyDict>()?; Ok(Wrap( @@ -528,7 +531,7 @@ impl From for ObjectValue { } impl<'a> FromPyObject<'a> for ObjectValue { - fn extract(ob: &'a PyAny) -> PyResult { + fn extract_bound(ob: &Bound<'a, PyAny>) -> PyResult { Python::with_gil(|py| { Ok(ObjectValue { inner: ob.to_object(py), @@ -560,7 +563,7 @@ impl Default for ObjectValue { } impl<'a, T: NativeType + FromPyObject<'a>> FromPyObject<'a> for Wrap> { - fn extract(obj: &'a PyAny) -> PyResult { + fn extract_bound(obj: &Bound<'a, PyAny>) -> PyResult { let seq = obj.downcast::()?; let mut v = Vec::with_capacity(seq.len().unwrap_or(0)); for item in seq.iter()? { @@ -571,8 +574,8 @@ impl<'a, T: NativeType + FromPyObject<'a>> FromPyObject<'a> for Wrap> { } #[cfg(feature = "asof_join")] -impl FromPyObject<'_> for Wrap { - fn extract(ob: &PyAny) -> PyResult { +impl<'py> FromPyObject<'py> for Wrap { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { let parsed = match &*(ob.extract::()?) { "backward" => AsofStrategy::Backward, "forward" => AsofStrategy::Forward, @@ -587,8 +590,8 @@ impl FromPyObject<'_> for Wrap { } } -impl FromPyObject<'_> for Wrap { - fn extract(ob: &PyAny) -> PyResult { +impl<'py> FromPyObject<'py> for Wrap { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { let parsed = match &*(ob.extract::()?) { "linear" => InterpolationMethod::Linear, "nearest" => InterpolationMethod::Nearest, @@ -603,8 +606,8 @@ impl FromPyObject<'_> for Wrap { } #[cfg(feature = "avro")] -impl FromPyObject<'_> for Wrap> { - fn extract(ob: &PyAny) -> PyResult { +impl<'py> FromPyObject<'py> for Wrap> { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { let parsed = match &*ob.extract::()? { "uncompressed" => None, "snappy" => Some(AvroCompression::Snappy), @@ -619,8 +622,8 @@ impl FromPyObject<'_> for Wrap> { } } -impl FromPyObject<'_> for Wrap { - fn extract(ob: &PyAny) -> PyResult { +impl<'py> FromPyObject<'py> for Wrap { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { let parsed = match &*ob.extract::()? { "physical" => CategoricalOrdering::Physical, "lexical" => CategoricalOrdering::Lexical, @@ -634,8 +637,8 @@ impl FromPyObject<'_> for Wrap { } } -impl FromPyObject<'_> for Wrap { - fn extract(ob: &PyAny) -> PyResult { +impl<'py> FromPyObject<'py> for Wrap { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { let parsed = match &*ob.extract::()? { "window" => StartBy::WindowBound, "datapoint" => StartBy::DataPoint, @@ -656,8 +659,8 @@ impl FromPyObject<'_> for Wrap { } } -impl FromPyObject<'_> for Wrap { - fn extract(ob: &PyAny) -> PyResult { +impl<'py> FromPyObject<'py> for Wrap { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { let parsed = match &*ob.extract::()? { "left" => ClosedWindow::Left, "right" => ClosedWindow::Right, @@ -674,8 +677,8 @@ impl FromPyObject<'_> for Wrap { } #[cfg(feature = "csv")] -impl FromPyObject<'_> for Wrap { - fn extract(ob: &PyAny) -> PyResult { +impl<'py> FromPyObject<'py> for Wrap { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { let parsed = match &*ob.extract::()? { "utf8" => CsvEncoding::Utf8, "utf8-lossy" => CsvEncoding::LossyUtf8, @@ -690,8 +693,8 @@ impl FromPyObject<'_> for Wrap { } #[cfg(feature = "ipc")] -impl FromPyObject<'_> for Wrap> { - fn extract(ob: &PyAny) -> PyResult { +impl<'py> FromPyObject<'py> for Wrap> { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { let parsed = match &*ob.extract::()? { "uncompressed" => None, "lz4" => Some(IpcCompression::LZ4), @@ -706,8 +709,8 @@ impl FromPyObject<'_> for Wrap> { } } -impl FromPyObject<'_> for Wrap { - fn extract(ob: &PyAny) -> PyResult { +impl<'py> FromPyObject<'py> for Wrap { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { let parsed = match &*ob.extract::()? { "inner" => JoinType::Inner, "left" => JoinType::Left, @@ -730,8 +733,8 @@ impl FromPyObject<'_> for Wrap { } } -impl FromPyObject<'_> for Wrap