Skip to content

Commit

Permalink
tweak python package (#96)
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin committed May 20, 2024
1 parent d89c3a8 commit 1fbedbf
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 33 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -492,11 +492,10 @@ jobs:

- name: run tests
run: |
cd crates/jiter-python
python3 -m pip install -U pip -r tests/requirements.txt
python3 -m pip install jiter --no-index --no-deps --find-links dist --force-reinstall
python3 -m pytest
working-directory: crates/jiter-python

# https://github.com/marketplace/actions/alls-green#why used for branch protection checks
check:
Expand Down
10 changes: 10 additions & 0 deletions crates/jiter-python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,13 @@ extension-module = ["pyo3/extension-module"]
[lib]
name = "jiter_python"
crate-type = ["cdylib", "rlib"]

[lints.clippy]
dbg_macro = "deny"
print_stdout = "deny"
print_stderr = "deny"
# in general we lint against the pedantic group, but we will whitelist
# certain lints which we don't want to enforce (for now)
pedantic = { level = "deny", priority = -1 }
missing_errors_doc = "allow"
must_use_candidate = "allow"
48 changes: 32 additions & 16 deletions crates/jiter-python/README.md
Original file line number Diff line number Diff line change
@@ -1,36 +1,52 @@
# jiter

[![CI](https://github.com/pydantic/jiter/workflows/CI/badge.svg?event=push)](https://github.com/pydantic/jiter/actions?query=event%3Apush+branch%3Amain+workflow%3ACI)
[![pypi](https://img.shields.io/pypi/v/jiter.svg)](https://pypi.python.org/pypi/jiter)
[![versions](https://img.shields.io/pypi/pyversions/jiter.svg)](https://github.com/pydantic/jiter)
[![license](https://img.shields.io/github/license/pydantic/jiter.svg)](https://github.com/pydantic/jiter/blob/main/LICENSE)

This is a standalone version of the JSON parser used in `pydantic-core`. The recommendation is to only use this package directly if you do not use `pydantic`.

The API is extremely minimal:

```python
def from_json(
data: bytes,
json_data: bytes,
/,
*,
allow_inf_nan: bool = True,
cache_strings: Literal[True, False, 'all', 'keys', 'none'] = True,
cache_strings: Literal[True, False, "all", "keys", "none"] = True,
allow_partial: bool = False,
catch_duplicate_keys: bool = False,
) -> Any:
"""
Parse input bytes into a JSON string.
allow_inf_nan: if True, to allow Infinity and NaN as values in the JSON
cache_strings: cache Python strings to improve performance at the cost of some memory usage
- True / 'all' - cache all strings
- 'keys' - cache only object keys
- 'none' - cache nothing
allow_partial: if True, return parsed content when reaching EOF without closing objects and arrays
catch_duplicate_keys: if True, raise an exception if objects contain the same key multiple times
Parse input bytes into a JSON object.
Arguments:
json_data: The JSON data to parse
allow_inf_nan: Whether to allow infinity (`Infinity` an `-Infinity`) and `NaN` values to float fields.
Defaults to True.
cache_strings: cache Python strings to improve performance at the cost of some memory usage
- True / 'all' - cache all strings
- 'keys' - cache only object keys
- False / 'none' - cache nothing
allow_partial: if True, return parsed content when reaching EOF without closing objects and arrays
catch_duplicate_keys: if True, raise an exception if objects contain the same key multiple times
Returns:
Python object built from the JSON input.
"""
...

def cache_clear() -> None:
"""Clear the string cache"""
...
"""
Reset the string cache.
"""

def cache_usage() -> int:
"""Get number of strings in the cache"""
...
"""
get the size of the string cache.
Returns:
Size of the string cache in bytes.
"""
```
37 changes: 33 additions & 4 deletions crates/jiter-python/jiter.pyi
Original file line number Diff line number Diff line change
@@ -1,12 +1,41 @@
from typing import Any, Literal

def from_json(
data: bytes,
json_data: bytes,
/,
*,
allow_inf_nan: bool = True,
cache_strings: Literal[True, False, "all", "keys", "none"] = True,
allow_partial: bool = False,
catch_duplicate_keys: bool = False,
) -> Any: ...
def cache_clear() -> None: ...
def cache_usage() -> int: ...
) -> Any:
"""
Parse input bytes into a JSON object.
Arguments:
json_data: The JSON data to parse
allow_inf_nan: Whether to allow infinity (`Infinity` an `-Infinity`) and `NaN` values to float fields.
Defaults to True.
cache_strings: cache Python strings to improve performance at the cost of some memory usage
- True / 'all' - cache all strings
- 'keys' - cache only object keys
- False / 'none' - cache nothing
allow_partial: if True, return parsed content when reaching EOF without closing objects and arrays
catch_duplicate_keys: if True, raise an exception if objects contain the same key multiple times
Returns:
Python object built from the JSON input.
"""

def cache_clear() -> None:
"""
Reset the string cache.
"""

def cache_usage() -> int:
"""
get the size of the string cache.
Returns:
Size of the string cache in bytes.
"""
29 changes: 24 additions & 5 deletions crates/jiter-python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,35 @@ build-backend = "maturin"

[project]
name = "jiter"
description = "Fast iterable JSON parser."
requires-python = ">=3.8"
authors = [
{name = "Samuel Colvin", email = "s@muelcolvin.com"}
]
dynamic = [
"description",
"license",
"readme",
"version"
license = "MIT"
readme = "README.md"
classifiers = [
"Development Status :: 4 - Beta",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Intended Audience :: Developers",
"Intended Audience :: Information Technology",
"Intended Audience :: System Administrators",
"License :: OSI Approved :: MIT License",
"Operating System :: Unix",
"Operating System :: POSIX :: Linux",
"Environment :: Console",
"Environment :: MacOS X",
"Topic :: File Formats :: JSON",
"Framework :: Pydantic :: 2",
]
dynamic = ["version"]

[tool.maturin]
module-name = "jiter"
Expand Down
12 changes: 6 additions & 6 deletions crates/jiter-python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ use jiter::{map_json_error, python_parse, StringCacheMode};

#[pyfunction(
signature = (
data,
json_data,
/,
*,
allow_inf_nan=true,
cache_strings=StringCacheMode::All,
Expand All @@ -16,22 +17,21 @@ use jiter::{map_json_error, python_parse, StringCacheMode};
)]
pub fn from_json<'py>(
py: Python<'py>,
data: &[u8],
json_data: &[u8],
allow_inf_nan: bool,
cache_strings: StringCacheMode,
allow_partial: bool,
catch_duplicate_keys: bool,
) -> PyResult<Bound<'py, PyAny>> {
let json_bytes = data;
python_parse(
py,
json_bytes,
json_data,
allow_inf_nan,
cache_strings,
allow_partial,
catch_duplicate_keys,
)
.map_err(|e| map_json_error(json_bytes, &e))
.map_err(|e| map_json_error(json_data, &e))
}

pub fn get_jiter_version() -> &'static str {
Expand All @@ -50,7 +50,7 @@ pub fn get_jiter_version() -> &'static str {

#[pyfunction]
pub fn cache_clear(py: Python<'_>) {
jiter::cache_clear(py)
jiter::cache_clear(py);
}

#[pyfunction]
Expand Down
1 change: 1 addition & 0 deletions crates/jiter-python/tests/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pytest
pytest-pretty
dirty_equals

0 comments on commit 1fbedbf

Please sign in to comment.