Skip to content

Commit

Permalink
Add argument none_value for None representation in loading and du…
Browse files Browse the repository at this point in the history
…mping (#53)

* Add `none` for `None` repr in load/loads

* Add `none` for `None` repr in dump/dumps

* Fix linting

* Edit as suggested

* Revise as suggested

* Add omit_none

* Update tests

* Fix linting

* Allow none_value=None for dumps to ignore None items

* Pass none_value to table_key()

* Update README.md and docstrings

* fix grammar in readme

* fix unwraps and add tests

* fix linting

* remove timedelta from py_type, fix linting

---------

Co-authored-by: Samuel Colvin <s@muelcolvin.com>
  • Loading branch information
pwwang and samuelcolvin committed Jun 24, 2024
1 parent 1d0b60e commit 092eff2
Show file tree
Hide file tree
Showing 12 changed files with 317 additions and 218 deletions.
285 changes: 126 additions & 159 deletions Cargo.lock

Large diffs are not rendered by default.

65 changes: 54 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ A better TOML library for python implemented in rust.
library, it passes all the [standard TOML tests](https://github.com/BurntSushi/toml-test) as well as having 100%
coverage on python code. Other TOML libraries for python I tried all failed to parse some valid TOML.
* Performance: see [github.com/pwwang/toml-bench](https://github.com/pwwang/toml-bench) -
rtoml is much faster than pure Python TOML libraries.
rtoml is the fastest Python TOML libraries at the time of writing.
* `None`-value handling: rtoml has flexible support for `None` values, instead of simply ignoring them.

## Install

Expand All @@ -33,38 +34,50 @@ installed before you can install rtoml.

#### load
```python
def load(toml: Union[str, Path, TextIO]) -> Dict[str, Any]: ...
def load(toml: Union[str, Path, TextIO], *, none_value: Optional[str] = None) -> Dict[str, Any]: ...
```

Parse TOML via a string or file and return a python dictionary. The `toml` argument may be a `str`,
`Path` or file object from `open()`.
Parse TOML via a string or file and return a python dictionary.

* `toml`: a `str`, `Path` or file object from `open()`.
* `none_value`: controlling which value in `toml` is loaded as `None` in python. By default, `none_value` is `None`, which means nothing is loaded as `None`

#### loads
```python
def loads(toml: str) -> Dict[str, Any]: ...
def loads(toml: str, *, none_value: Optional[str] = None) -> Dict[str, Any]: ...
```

Parse a TOML string and return a python dictionary. (provided to match the interface of `json` and similar libraries)

* `toml`: a `str` containing TOML.
* `none_value`: controlling which value in `toml` is loaded as `None` in python. By default, `none_value` is `None`, which means nothing is loaded as `None`

#### dumps
```python
def dumps(obj: Any, *, pretty: bool = False) -> str: ...
def dumps(obj: Any, *, pretty: bool = False, none_value: Optional[str] = "null") -> str: ...
```

Serialize a python object to TOML.

If `pretty` is true, output has a more "pretty" format.
* `obj`: a python object to be serialized.
* `pretty`: if `True` the output has a more "pretty" format.
* `none_value`: controlling how `None` values in `obj` are serialized. `none_value=None` means `None` values are ignored.

#### dump
```python
def dump(obj: Any, file: Union[Path, TextIO], *, pretty: bool = False) -> int: ...
def dump(
obj: Any, file: Union[Path, TextIO], *, pretty: bool = False, none_value: Optional[str] = "null"
) -> int: ...
```

Serialize a python object to TOML and write it to a file. `file` may be a `Path` or file object from `open()`.
Serialize a python object to TOML and write it to a file.

If `pretty` is true, output has a more "pretty" format.
* `obj`: a python object to be serialized.
* `file`: a `Path` or file object from `open()`.
* `pretty`: if `True` the output has a more "pretty" format.
* `none_value`: controlling how `None` values in `obj` are serialized. `none_value=None` means `None` values are ignored.

### Example
### Examples

```py
from datetime import datetime, timezone, timedelta
Expand Down Expand Up @@ -116,3 +129,33 @@ server = "192.168.1.1"
ports = [8001, 8001, 8002]
"""
```

An example of `None`-value handling:

```python
obj = {
'a': None,
'b': 1,
'c': [1, 2, None, 3],
}

# Ignore None values
assert rtoml.dumps(obj, none_value=None) == """\
b = 1
c = [1, 2, 3]
"""

# Serialize None values as '@None'
assert rtoml.dumps(obj, none_value='@None') == """\
a = "@None"
b = 1
c = [1, 2, "@None", 3]
"""

# Deserialize '@None' back to None
assert rtoml.load("""\
a = "@None"
b = 1
c = [1, 2, "@None", 3]
""", none_value='@None') == obj
```
3 changes: 2 additions & 1 deletion example.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import datetime, timezone, timedelta
from datetime import datetime, timedelta, timezone

import rtoml

obj = {
Expand Down
46 changes: 32 additions & 14 deletions rtoml/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from io import TextIOBase
from pathlib import Path
from typing import Any, Dict, TextIO, Union
from typing import Any, Dict, Optional, TextIO, Union

from . import _rtoml

Expand All @@ -13,49 +13,67 @@
TomlSerializationError = _rtoml.TomlSerializationError


def load(toml: Union[str, Path, TextIO]) -> Dict[str, Any]:
def load(toml: Union[str, Path, TextIO], *, none_value: Optional[str] = None) -> Dict[str, Any]:
"""
Parse TOML via a string or file and return a python dict. The `toml` argument may be a `str`,
`Path` or file object from `open()`.
Parse TOML via a string or file and return a python dict.
Args:
toml: a `str`, `Path` or file object from `open()`.
none_value: controlling which value in `toml` is loaded as `None` in python.
By default, `none_value` is `None`, which means nothing is loaded as `None`.
"""
if isinstance(toml, Path):
toml = toml.read_text(encoding='UTF-8')
elif isinstance(toml, (TextIOBase, TextIO)):
toml = toml.read()

return loads(toml)
return loads(toml, none_value=none_value)


def loads(toml: str) -> Dict[str, Any]:
def loads(toml: str, *, none_value: Optional[str] = None) -> Dict[str, Any]:
"""
Parse a TOML string and return a python dict. (provided to match the interface of `json` and similar libraries)
Args:
toml: a `str` containing TOML.
none_value: controlling which value in `toml` is loaded as `None` in python.
By default, `none_value` is `None`, which means nothing is loaded as `None`.
"""
if not isinstance(toml, str):
raise TypeError(f'invalid toml input, must be str not {type(toml)}')
return _rtoml.deserialize(toml)
return _rtoml.deserialize(toml, none_value=none_value)


def dumps(obj: Any, *, pretty: bool = False) -> str:
def dumps(obj: Any, *, pretty: bool = False, none_value: Optional[str] = 'null') -> str:
"""
Serialize a python object to TOML.
If `pretty` is true, output has a more "pretty" format.
Args:
obj: a python object to be serialized.
pretty: if true, output has a more "pretty" format.
none_value: controlling how `None` values in `obj` are serialized.
`none_value=None` means `None` values are ignored.
"""
if pretty:
serialize = _rtoml.serialize_pretty
else:
serialize = _rtoml.serialize

return serialize(obj)
return serialize(obj, none_value=none_value)


def dump(obj: Any, file: Union[Path, TextIO], *, pretty: bool = False) -> int:
def dump(obj: Any, file: Union[Path, TextIO], *, pretty: bool = False, none_value: Optional[str] = 'null') -> int:
"""
Serialize a python object to TOML and write it to a file. `file` may be a `Path` or file object from `open()`.
Serialize a python object to TOML and write it to a file.
If `pretty` is true, output has a more "pretty" format.
Args:
obj: a python object to be serialized.
file: a `Path` or file object from `open()`.
pretty: if `True` the output has a more "pretty" format.
none_value: controlling how `None` values in `obj` are serialized.
`none_value=None` means `None` values are ignored.
"""
s = dumps(obj, pretty=pretty)
s = dumps(obj, pretty=pretty, none_value=none_value)
if isinstance(file, Path):
return file.write_text(s, encoding='UTF-8')
else:
Expand Down
6 changes: 3 additions & 3 deletions rtoml/_rtoml.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ from typing import Any

__version__: str

def deserialize(toml: str) -> Any: ...
def serialize(obj: Any) -> str: ...
def serialize_pretty(obj: Any) -> str: ...
def deserialize(toml: str, none_value: str | None = None) -> Any: ...
def serialize(obj: Any, none_value: str | None = 'null') -> str: ...
def serialize_pretty(obj: Any, none_value: str | None = 'null') -> str: ...

class TomlParsingError(ValueError): ...
class TomlSerializationError(ValueError): ...
16 changes: 10 additions & 6 deletions src/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ pub type NoHashSet<T> = HashSet<T, BuildNoHashHasher<T>>;

pub struct PyDeserializer<'py> {
py: Python<'py>,
none_value: Option<&'py str>,
}

impl<'py> PyDeserializer<'py> {
pub fn new(py: Python<'py>) -> Self {
Self { py }
pub fn new(py: Python<'py>, none_value: Option<&'py str>) -> Self {
Self { py, none_value }
}
}

Expand Down Expand Up @@ -78,7 +79,10 @@ impl<'de, 'py> Visitor<'de> for PyDeserializer<'py> {
where
E: de::Error,
{
Ok(value.into_py(self.py))
match self.none_value {
Some(none_value) if value == none_value => Ok(self.py.None()),
_ => Ok(value.into_py(self.py)),
}
}

fn visit_unit<E>(self) -> Result<Self::Value, E> {
Expand All @@ -91,7 +95,7 @@ impl<'de, 'py> Visitor<'de> for PyDeserializer<'py> {
{
let mut elements = Vec::new();

while let Some(elem) = seq.next_element_seed(PyDeserializer::new(self.py))? {
while let Some(elem) = seq.next_element_seed(PyDeserializer::new(self.py, self.none_value))? {
elements.push(elem);
}

Expand All @@ -102,7 +106,7 @@ impl<'de, 'py> Visitor<'de> for PyDeserializer<'py> {
where
A: MapAccess<'de>,
{
match map_access.next_entry_seed(PhantomData::<String>, PyDeserializer::new(self.py))? {
match map_access.next_entry_seed(PhantomData::<String>, PyDeserializer::new(self.py, self.none_value))? {
Some((first_key, first_value)) if first_key == DATETIME_MAPPING_KEY => {
let py_string = first_value.extract::<&str>(self.py).map_err(de::Error::custom)?;
let dt: TomlDatetime = TomlDatetime::from_str(py_string).map_err(de::Error::custom)?;
Expand All @@ -119,7 +123,7 @@ impl<'de, 'py> Visitor<'de> for PyDeserializer<'py> {
dict.set_item(first_key, first_value).map_err(de::Error::custom)?;

while let Some((key, value)) =
map_access.next_entry_seed(PhantomData::<String>, PyDeserializer::new(self.py))?
map_access.next_entry_seed(PhantomData::<String>, PyDeserializer::new(self.py, self.none_value))?
{
if key_set.insert(hash_builder.hash_one(&key)) {
dict.set_item(key, value).map_err(de::Error::custom)?;
Expand Down
12 changes: 6 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,25 @@ create_exception!(_rtoml, TomlParsingError, PyValueError);
create_exception!(_rtoml, TomlSerializationError, PyValueError);

#[pyfunction]
fn deserialize(py: Python, toml_data: String) -> PyResult<PyObject> {
fn deserialize(py: Python, toml_data: String, none_value: Option<&str>) -> PyResult<PyObject> {
let mut deserializer = Deserializer::new(&toml_data);
let seed = de::PyDeserializer::new(py);
let seed = de::PyDeserializer::new(py, none_value);
seed.deserialize(&mut deserializer)
.map_err(|e| TomlParsingError::new_err(e.to_string()))
}

#[pyfunction]
fn serialize(py: Python, obj: &PyAny) -> PyResult<String> {
let s = SerializePyObject::new(py, obj);
fn serialize(py: Python, obj: &PyAny, none_value: Option<&str>) -> PyResult<String> {
let s = SerializePyObject::new(py, obj, none_value);
match to_toml_string(&s) {
Ok(s) => Ok(s),
Err(e) => Err(TomlSerializationError::new_err(e.to_string())),
}
}

#[pyfunction]
fn serialize_pretty(py: Python, obj: &PyAny) -> PyResult<String> {
let s = SerializePyObject::new(py, obj);
fn serialize_pretty(py: Python, obj: &PyAny, none_value: Option<&str>) -> PyResult<String> {
let s = SerializePyObject::new(py, obj, none_value);
match to_toml_string_pretty(&s) {
Ok(s) => Ok(s),
Err(e) => Err(TomlSerializationError::new_err(e.to_string())),
Expand Down
4 changes: 1 addition & 3 deletions src/py_type.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use pyo3::once_cell::GILOnceCell;
use pyo3::prelude::*;
use pyo3::types::{PyByteArray, PyBytes, PyDate, PyDateTime, PyDelta, PyDict, PyList, PyString, PyTime, PyTuple};
use pyo3::types::{PyByteArray, PyBytes, PyDate, PyDateTime, PyDict, PyList, PyString, PyTime, PyTuple};

#[derive(Clone)]
#[cfg_attr(debug_assertions, derive(Debug))]
Expand All @@ -23,7 +23,6 @@ pub struct PyTypeLookup {
pub datetime: usize,
pub date: usize,
pub time: usize,
pub timedelta: usize,
}

static TYPE_LOOKUP: GILOnceCell<PyTypeLookup> = GILOnceCell::new();
Expand Down Expand Up @@ -51,7 +50,6 @@ impl PyTypeLookup {
.get_type_ptr() as usize,
date: PyDate::new(py, 2000, 1, 1).unwrap().get_type_ptr() as usize,
time: PyTime::new(py, 0, 0, 0, 0, None).unwrap().get_type_ptr() as usize,
timedelta: PyDelta::new(py, 0, 0, 0, false).unwrap().get_type_ptr() as usize,
}
}

Expand Down
Loading

0 comments on commit 092eff2

Please sign in to comment.