Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
638dfd2
preliminary change
tushushu Oct 29, 2022
6d71437
pymapping get item
tushushu Oct 30, 2022
f97b1fe
validate_mapping
tushushu Nov 1, 2022
c3177be
fix for loop
tushushu Nov 2, 2022
58cedd8
fix CI
tushushu Nov 3, 2022
99b0e82
Merge branch 'main' into wip-py-mapping
tushushu Nov 3, 2022
c160ed1
fix CI
tushushu Nov 3, 2022
44e4322
fix ci
tushushu Nov 3, 2022
d0bddce
fix CI
tushushu Nov 4, 2022
b47bd30
uncheck bounds
tushushu Nov 5, 2022
093eabd
mapping type
tushushu Nov 5, 2022
1d1a951
dict error
tushushu Nov 6, 2022
10bfb1b
fix bad mapping tuple 1
tushushu Nov 6, 2022
2d6086b
fix CI
tushushu Nov 8, 2022
4f7fc8d
format code
tushushu Nov 8, 2022
ee6c21e
fix error test
tushushu Nov 8, 2022
4bb586b
cargo clippy fix
tushushu Nov 8, 2022
9c700db
cargo clippy fix
tushushu Nov 8, 2022
12dd60a
remove dead code
tushushu Nov 8, 2022
b0a6c4b
fix import
tushushu Nov 8, 2022
b0430c7
fix
tushushu Nov 8, 2022
85b1916
pypy has no PyTuple::get_item_unchecked
tushushu Nov 8, 2022
bea9060
Merge branch 'main' into wip-py-mapping
tushushu Nov 8, 2022
0a3531b
fix lint
tushushu Nov 8, 2022
54ede65
rename func
tushushu Nov 12, 2022
61651f0
use get_item_unchecked
tushushu Nov 12, 2022
34e5e39
not using unwrap
tushushu Nov 13, 2022
ec1d3e9
iter mapping
tushushu Nov 15, 2022
54bbac7
fix CI
tushushu Nov 15, 2022
4e11867
use macros
tushushu Nov 15, 2022
5f4d401
fix dict iter
samuelcolvin Nov 16, 2022
81365ca
simplify typeddict logic
samuelcolvin Nov 16, 2022
0897615
Merge branch 'main' into tushushu-wip-py-mapping
samuelcolvin Nov 16, 2022
05f676c
fix for pypy, small cleanup
samuelcolvin Nov 16, 2022
a9f4132
more test cases
samuelcolvin Nov 16, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pydantic_core/core_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1173,7 +1173,7 @@ def multi_host_url_schema(
'string_too_long',
'string_pattern_mismatch',
'dict_type',
'dict_from_mapping',
'mapping_type',
'list_type',
'tuple_type',
'set_type',
Expand Down
12 changes: 6 additions & 6 deletions src/errors/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,9 @@ pub enum ErrorType {
// dict errors
#[strum(message = "Input should be a valid dictionary")]
DictType,
#[strum(message = "Unable to convert mapping to a dictionary, error: {error}")]
DictFromMapping {
error: String,
#[strum(message = "Input should be a valid mapping, error: {error}")]
MappingType {
error: Cow<'static, str>,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@samuelcolvin What's the downside to use String type here? Since it still passed the CI.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the case where we use a &'static str error, we can avoid allocating a new string, and just use the reference to that static string. But this also allows us to use a String if we want something else.

Cow here just means

enum Cow {
    Borrowed(&'static str),
    Owned(String)
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, that makes sense to me.

},
// ---------------------
// list errors
Expand Down Expand Up @@ -448,7 +448,7 @@ impl ErrorType {
Self::StringTooShort { .. } => extract_context!(StringTooShort, ctx, min_length: usize),
Self::StringTooLong { .. } => extract_context!(StringTooLong, ctx, max_length: usize),
Self::StringPatternMismatch { .. } => extract_context!(StringPatternMismatch, ctx, pattern: String),
Self::DictFromMapping { .. } => extract_context!(DictFromMapping, ctx, error: String),
Self::MappingType { .. } => extract_context!(Cow::Owned, MappingType, ctx, error: String),
Self::BytesTooShort { .. } => extract_context!(BytesTooShort, ctx, min_length: usize),
Self::BytesTooLong { .. } => extract_context!(BytesTooLong, ctx, max_length: usize),
Self::ValueError { .. } => extract_context!(ValueError, ctx, error: String),
Expand Down Expand Up @@ -540,7 +540,7 @@ impl ErrorType {
Self::StringTooShort { min_length } => to_string_render!(self, min_length),
Self::StringTooLong { max_length } => to_string_render!(self, max_length),
Self::StringPatternMismatch { pattern } => render!(self, pattern),
Self::DictFromMapping { error } => render!(self, error),
Self::MappingType { error } => render!(self, error),
Self::BytesTooShort { min_length } => to_string_render!(self, min_length),
Self::BytesTooLong { max_length } => to_string_render!(self, max_length),
Self::ValueError { error } => render!(self, error),
Expand Down Expand Up @@ -593,7 +593,7 @@ impl ErrorType {
Self::StringTooShort { min_length } => py_dict!(py, min_length),
Self::StringTooLong { max_length } => py_dict!(py, max_length),
Self::StringPatternMismatch { pattern } => py_dict!(py, pattern),
Self::DictFromMapping { error } => py_dict!(py, error),
Self::MappingType { error } => py_dict!(py, error),
Self::BytesTooShort { min_length } => py_dict!(py, min_length),
Self::BytesTooLong { max_length } => py_dict!(py, max_length),
Self::ValueError { error } => py_dict!(py, error),
Expand Down
55 changes: 6 additions & 49 deletions src/input/input_python.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
use std::borrow::Cow;
use std::str::from_utf8;

use pyo3::exceptions::PyAttributeError;
use pyo3::once_cell::GILOnceCell;
use pyo3::prelude::*;
use pyo3::types::{
PyBool, PyByteArray, PyBytes, PyDate, PyDateTime, PyDelta, PyDict, PyFrozenSet, PyIterator, PyList, PyMapping,
PySequence, PySet, PyString, PyTime, PyTuple, PyType,
PySet, PyString, PyTime, PyTuple, PyType,
};
#[cfg(not(PyPy))]
use pyo3::types::{PyDictItems, PyDictKeys, PyDictValues};
use pyo3::{ffi, intern, AsPyPointer, PyTypeInfo};

use crate::errors::{py_err_string, ErrorType, InputValue, LocItem, ValError, ValLineError, ValResult};
use crate::errors::{ErrorType, InputValue, LocItem, ValError, ValLineError, ValResult};
use crate::{PyMultiHostUrl, PyUrl};

use super::datetime::{
Expand Down Expand Up @@ -329,8 +328,8 @@ impl<'a> Input<'a> for PyAny {
fn lax_dict(&'a self) -> ValResult<GenericMapping<'a>> {
if let Ok(dict) = self.cast_as::<PyDict>() {
Ok(dict.into())
} else if let Some(generic_mapping) = mapping_as_dict(self) {
generic_mapping
} else if let Ok(mapping) = self.cast_as::<PyMapping>() {
Ok(mapping.into())
} else {
Err(ValError::new(ErrorType::DictType, self))
}
Expand All @@ -342,9 +341,8 @@ impl<'a> Input<'a> for PyAny {
if let Ok(dict) = self.cast_as::<PyDict>() {
return Ok(dict.into());
} else if !strict {
// we can't do this in one set of if/else because we need to check from_mapping before doing this
if let Some(generic_mapping) = mapping_as_dict(self) {
return generic_mapping;
if let Ok(mapping) = self.cast_as::<PyMapping>() {
return Ok(mapping.into());
}
}

Expand Down Expand Up @@ -643,47 +641,6 @@ impl<'a> Input<'a> for PyAny {
}
}

/// return None if obj is not a mapping (cast_as::<PyMapping> fails or mapping.items returns an AttributeError)
/// otherwise try to covert the mapping to a dict and return an Some(error) if it fails
fn mapping_as_dict(obj: &PyAny) -> Option<ValResult<GenericMapping>> {
let mapping: &PyMapping = match obj.cast_as() {
Ok(mapping) => mapping,
Err(_) => return None,
};
// see https://github.com/PyO3/pyo3/issues/2072 - the cast_as::<PyMapping> is not entirely accurate
// and returns some things which are definitely not mappings (e.g. str) as mapping,
// hence we also require that the object as `items` to consider it a mapping
let result_dict = match mapping.items() {
Ok(seq) => mapping_seq_as_dict(seq),
Err(err) => {
if matches!(err.get_type(obj.py()).is_subclass_of::<PyAttributeError>(), Ok(true)) {
return None;
} else {
Err(err)
}
}
};
match result_dict {
Ok(dict) => Some(Ok(dict.into())),
Err(err) => Some(Err(ValError::new(
ErrorType::DictFromMapping {
error: py_err_string(obj.py(), err),
},
obj,
))),
}
}

// creating a temporary dict is slow, we could perhaps use an indexmap instead
fn mapping_seq_as_dict(seq: &PySequence) -> PyResult<&PyDict> {
let dict = PyDict::new(seq.py());
for r in seq.iter()? {
let (key, value): (&PyAny, &PyAny) = r?.extract()?;
dict.set_item(key, value)?;
}
Ok(dict)
}

/// Best effort check of whether it's likely to make sense to inspect obj for attributes and iterate over it
/// with `obj.dir()`
fn from_attributes_applicable(obj: &PyAny) -> bool {
Expand Down
5 changes: 3 additions & 2 deletions src/input/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ pub use datetime::{EitherDate, EitherDateTime, EitherTime, EitherTimedelta};
pub use input_abstract::Input;
pub use parse_json::{JsonInput, JsonObject, JsonType};
pub use return_enums::{
py_string_str, EitherBytes, EitherString, GenericArguments, GenericCollection, GenericIterator, GenericMapping,
JsonArgs, PyArgs,
py_string_str, AttributesGenericIterator, DictGenericIterator, EitherBytes, EitherString, GenericArguments,
GenericCollection, GenericIterator, GenericMapping, JsonArgs, JsonObjectGenericIterator, MappingGenericIterator,
PyArgs,
};

pub fn repr_string(v: &PyAny) -> PyResult<String> {
Expand Down
184 changes: 183 additions & 1 deletion src/input/return_enums.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
use std::borrow::Cow;

use pyo3::intern;
use pyo3::prelude::*;
use pyo3::types::{PyBytes, PyDict, PyFrozenSet, PyIterator, PyList, PySet, PyString, PyTuple};
use pyo3::types::iter::PyDictIterator;
use pyo3::types::{PyBytes, PyDict, PyFrozenSet, PyIterator, PyList, PyMapping, PySet, PyString, PyTuple};

#[cfg(not(PyPy))]
use pyo3::types::PyFunction;
#[cfg(not(PyPy))]
use pyo3::PyTypeInfo;

use indexmap::map::Iter;

use crate::errors::{py_err_string, ErrorType, InputValue, ValError, ValLineError, ValResult};
use crate::recursion_guard::RecursionGuard;
Expand Down Expand Up @@ -233,14 +242,187 @@ impl<'a> GenericCollection<'a> {
#[cfg_attr(debug_assertions, derive(Debug))]
pub enum GenericMapping<'a> {
PyDict(&'a PyDict),
PyMapping(&'a PyMapping),
PyGetAttr(&'a PyAny),
JsonObject(&'a JsonObject),
}

derive_from!(GenericMapping, PyDict, PyDict);
derive_from!(GenericMapping, PyMapping, PyMapping);
derive_from!(GenericMapping, PyGetAttr, PyAny);
derive_from!(GenericMapping, JsonObject, JsonObject);

pub struct DictGenericIterator<'py> {
dict_iter: PyDictIterator<'py>,
}

impl<'py> DictGenericIterator<'py> {
pub fn new(dict: &'py PyDict) -> ValResult<'py, Self> {
Ok(Self { dict_iter: dict.iter() })
}
}

impl<'py> Iterator for DictGenericIterator<'py> {
type Item = ValResult<'py, (&'py PyAny, &'py PyAny)>;

fn next(&mut self) -> Option<Self::Item> {
self.dict_iter.next().map(Ok)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that every element has been changed from (&PyAny, &PyAny) to Result<(&PyAny, &PyAny), E>. I am not sure if it takes time to extract T from Result<T, E>. Does it break the Zero Cost Abstractions rule?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My guess it is that it will not have zero cost, but it will have a tiny cost.

CodSpeed which is now running out benchmarks synthetically with ValGrind should be able to tell us if stuff like this causes a slow down.

Unfortunately codspeed is currently failing on pull requests from forks, but @art049 is working on fixing that now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comparing main to this with pytest-speed shows no significant change

Benchmarks being compared                                                                              
┏━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃                       ┃  Before                                     ┃  After                        ┃
┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│  ID                   │  010                                        │  011                          │
│  Branch               │  main                                       │  tushushu-wip-py-mapping      │
│  Commit SHA           │  1900f4a                                    │  05f676c                      │
│  Commit Message       │  add s390x & ppc64le linux binaries (#333)  │  fix for pypy, small cleanup  │
│                       │                                             │                               │
│                       │  * add s390x & ppc64le linux binaries       │                               │
│                       │                                             │                               │
│                       │  * bump                                     │                               │
│                       │                                             │                               │
│                       │  * setting interpreter and container        │                               │
│  Benchmark Timestamp  │  13:53 (1 mins ago)                         │  13:54 (12 seconds ago)       │
└───────────────────────┴─────────────────────────────────────────────┴───────────────────────────────┘

Benchmarks Comparison                                                                                      
┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓
┃  Group                 ┃  Benchmark                 ┃  Before (ns/iter)  ┃  After (ns/iter)  ┃  Change  ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩
│  create small model    │  small_class_core_dict     │             279.2  │            273.2  │  -2.14%  │
├────────────────────────┼────────────────────────────┼────────────────────┼───────────────────┼──────────┤
│  List[TypedDict]       │  list_of_dict_models_core  │           12470.0  │          12377.2  │  -0.74%  │
├────────────────────────┼────────────────────────────┼────────────────────┼───────────────────┼──────────┤
│  Dict                  │  dict_of_ints_core         │          347572.6  │         343434.7  │  -1.19%  │
├────────────────────────┼────────────────────────────┼────────────────────┼───────────────────┼──────────┤
│  Dict                  │  dict_of_any_core          │          134555.5  │         135946.4  │  +1.03%  │
├────────────────────────┼────────────────────────────┼────────────────────┼───────────────────┼──────────┤
│  Dict JSON             │  dict_of_ints_core_json    │          367568.2  │         368587.2  │  +0.28%  │
├────────────────────────┼────────────────────────────┼────────────────────┼───────────────────┼──────────┤
│  List[DictSimpleMode]  │  many_models_core_dict     │          136714.7  │         137600.5  │  +0.65%  │
└────────────────────────┴────────────────────────────┴────────────────────┴───────────────────┴──────────┘

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the explanation, and it seems to be worthy to make a trade off between the speed and code abstraction.

}
// size_hint is omitted as it isn't needed
}

pub struct MappingGenericIterator<'py> {
input: &'py PyAny,
iter: &'py PyIterator,
}

fn mapping_err<'py>(err: PyErr, py: Python<'py>, input: &'py impl Input<'py>) -> ValError<'py> {
ValError::new(
ErrorType::MappingType {
error: py_err_string(py, err).into(),
},
input,
)
}

impl<'py> MappingGenericIterator<'py> {
pub fn new(mapping: &'py PyMapping) -> ValResult<'py, Self> {
let py = mapping.py();
let input: &PyAny = mapping;
let iter = mapping
.items()
.map_err(|e| mapping_err(e, py, input))?
.iter()
.map_err(|e| mapping_err(e, py, input))?;
Ok(Self { iter, input })
}
}

const MAPPING_TUPLE_ERROR: &str = "Mapping items must be tuples of (key, value) pairs";

impl<'py> Iterator for MappingGenericIterator<'py> {
type Item = ValResult<'py, (&'py PyAny, &'py PyAny)>;

fn next(&mut self) -> Option<Self::Item> {
let item = match self.iter.next() {
Some(Err(e)) => return Some(Err(mapping_err(e, self.iter.py(), self.input))),
Some(Ok(item)) => item,
None => return None,
};
let tuple: &PyTuple = match item.cast_as() {
Ok(tuple) => tuple,
Err(_) => {
return Some(Err(ValError::new(
ErrorType::MappingType {
error: MAPPING_TUPLE_ERROR.into(),
},
self.input,
)))
}
};
if tuple.len() != 2 {
return Some(Err(ValError::new(
ErrorType::MappingType {
error: MAPPING_TUPLE_ERROR.into(),
},
self.input,
)));
};
#[cfg(PyPy)]
let key = tuple.get_item(0).unwrap();
#[cfg(PyPy)]
let value = tuple.get_item(1).unwrap();
#[cfg(not(PyPy))]
let key = unsafe { tuple.get_item_unchecked(0) };
#[cfg(not(PyPy))]
let value = unsafe { tuple.get_item_unchecked(1) };
Some(Ok((key, value)))
}
// size_hint is omitted as it isn't needed
}

pub struct AttributesGenericIterator<'py> {
object: &'py PyAny,
attributes: &'py PyList,
index: usize,
}

impl<'py> AttributesGenericIterator<'py> {
pub fn new(py_any: &'py PyAny) -> ValResult<'py, Self> {
Ok(Self {
object: py_any,
attributes: py_any.dir(),
index: 0,
})
}
}

impl<'py> Iterator for AttributesGenericIterator<'py> {
type Item = ValResult<'py, (&'py PyAny, &'py PyAny)>;

fn next(&mut self) -> Option<Self::Item> {
// loop until we find an attribute who's name does not start with underscore,
// or we get to the end of the list of attributes
while self.index < self.attributes.len() {
#[cfg(PyPy)]
let name: &PyAny = self.attributes.get_item(self.index).unwrap();
#[cfg(not(PyPy))]
let name: &PyAny = unsafe { self.attributes.get_item_unchecked(self.index) };
self.index += 1;
// from benchmarks this is 14x faster than using the python `startswith` method
let name_cow = match name.cast_as::<PyString>() {
Ok(name) => name.to_string_lossy(),
Err(e) => return Some(Err(e.into())),
};
if !name_cow.as_ref().starts_with('_') {
// getattr is most likely to fail due to an exception in a @property, skip
if let Ok(attr) = self.object.getattr(name_cow.as_ref()) {
// we don't want bound methods to be included, is there a better way to check?
// ref https://stackoverflow.com/a/18955425/949890
let is_bound = matches!(attr.hasattr(intern!(attr.py(), "__self__")), Ok(true));
// the PyFunction::is_type_of(attr) catches `staticmethod`, but also any other function,
// I think that's better than including static methods in the yielded attributes,
// if someone really wants fields, they can use an explicit field, or a function to modify input
#[cfg(not(PyPy))]
if !is_bound && !PyFunction::is_type_of(attr) {
return Some(Ok((name, attr)));
}
// MASSIVE HACK! PyFunction doesn't exist for PyPy,
// is_instance_of::<PyFunction> crashes with a null pointer, hence this hack, see
// https://github.com/pydantic/pydantic-core/pull/161#discussion_r917257635
#[cfg(PyPy)]
if !is_bound && attr.get_type().to_string() != "<class 'function'>" {
return Some(Ok((name, attr)));
}
}
}
}
None
}
// size_hint is omitted as it isn't needed
}

pub struct JsonObjectGenericIterator<'py> {
object_iter: Iter<'py, String, JsonInput>,
}

impl<'py> JsonObjectGenericIterator<'py> {
pub fn new(json_object: &'py JsonObject) -> ValResult<'py, Self> {
Ok(Self {
object_iter: json_object.iter(),
})
}
}

impl<'py> Iterator for JsonObjectGenericIterator<'py> {
type Item = ValResult<'py, (&'py String, &'py JsonInput)>;

fn next(&mut self) -> Option<Self::Item> {
self.object_iter.next().map(Ok)
}
// size_hint is omitted as it isn't needed
}

#[derive(Debug, Clone)]
pub enum GenericIterator {
PyIterator(GenericPyIterator),
Expand Down
Loading