diff --git a/poetry.lock b/poetry.lock index 3aae21a..b2c3d56 100644 --- a/poetry.lock +++ b/poetry.lock @@ -436,6 +436,112 @@ type1 = ["xattr ; sys_platform == \"darwin\""] unicode = ["unicodedata2 (>=17.0.0) ; python_version <= \"3.14\""] woff = ["brotli (>=1.0.1) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\"", "zopfli (>=0.1.4)"] +[[package]] +name = "ijson" +version = "3.5.0" +description = "Iterative JSON parser with standard Python iterator interfaces" +optional = true +python-versions = ">=3.9" +groups = ["main"] +markers = "extra == \"json-loader\" or extra == \"all\"" +files = [ + {file = "ijson-3.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ea8dcac10d86adaeead454bc25c97b68d0bda573d5fd6f86f5e21cf8f7906f88"}, + {file = "ijson-3.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:92b0495bbb2150bbf14fc5d98fb6d76bcd1c526605a172709e602e6fedc96495"}, + {file = "ijson-3.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7af0c4c8943be8b09a4e57bdc1da6001dae7b36526d4154fe5c8224738d0921f"}, + {file = "ijson-3.5.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:45887d5e84ff0d2b138c926cebd9071830733968afe8d9d12080b3c178c7f918"}, + {file = "ijson-3.5.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9a70b575be8e57a28c80e90ed349ad3a851c3478524c70e36e07d6092ecd12c9"}, + {file = "ijson-3.5.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2adeecd45830bfd5580ca79a584154713aabef0b9607e16249133df5d2859813"}, + {file = "ijson-3.5.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d873e72889e7fc5962ab58909f1adff338d7c2f49e450e5b5fe844eff8155a14"}, + {file = "ijson-3.5.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9a88c559456a79708592234d697645d92b599718f4cbbeaa6515f83ac63ca0ae"}, + {file = "ijson-3.5.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cf83f58ad50dc0d39a2105cb26d4f359b38f42cef68b913170d4d47d97d97ba5"}, + {file = "ijson-3.5.0-cp310-cp310-win32.whl", hash = "sha256:aec4580a7712a19b1f95cd41bed260fc6a31266d37ef941827772a4c199e8143"}, + {file = "ijson-3.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:9a9c4c70501e23e8eb1675330686d1598eebfa14b6f0dbc8f00c2e081cc628fa"}, + {file = "ijson-3.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5616311404b858d32740b7ad8b9a799c62165f5ecb85d0a8ed16c21665a90533"}, + {file = "ijson-3.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e9733f94029dd41702d573ef64752e2556e72aea14623d6dbb7a44ca1ccf30fd"}, + {file = "ijson-3.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:db8398c6721b98412a4f618da8022550c8b9c5d9214040646071b5deb4d4a393"}, + {file = "ijson-3.5.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c061314845c08163b1784b6076ea5f075372461a32e6916f4e5f211fd4130b64"}, + {file = "ijson-3.5.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1111a1c5ac79119c5d6e836f900c1a53844b50a18af38311baa6bb61e2645aca"}, + {file = "ijson-3.5.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e74aff8c681c24002b61b1822f9511d4c384f324f7dbc08c78538e01fdc9fcb"}, + {file = "ijson-3.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:739a7229b1b0cc5f7e2785a6e7a5fc915e850d3fed9588d0e89a09f88a417253"}, + {file = "ijson-3.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ef88712160360cab3ca6471a4e5418243f8b267cf1fe1620879d1b5558babc71"}, + {file = "ijson-3.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6ca0d1b6b5f8166a6248f4309497585fb8553b04bc8179a0260fad636cfdb798"}, + {file = "ijson-3.5.0-cp311-cp311-win32.whl", hash = "sha256:966039cf9047c7967febf7b9a52ec6f38f5464a4c7fbb5565e0224b7376fefff"}, + {file = "ijson-3.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:6bad6a1634cb7c9f3f4c7e52325283b35b565f5b6cc27d42660c6912ce883422"}, + {file = "ijson-3.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1ebefbe149a6106cc848a3eaf536af51a9b5ccc9082de801389f152dba6ab755"}, + {file = "ijson-3.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:19e30d9f00f82e64de689c0b8651b9cfed879c184b139d7e1ea5030cec401c21"}, + {file = "ijson-3.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a04a33ee78a6f27b9b8528c1ca3c207b1df3b8b867a4cf2fcc4109986f35c227"}, + {file = "ijson-3.5.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7d48dc2984af02eb3c56edfb3f13b3f62f2f3e4fe36f058c8cfc75d93adf4fed"}, + {file = "ijson-3.5.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f1e73a44844d9adbca9cf2c4132cd875933e83f3d4b23881fcaf82be83644c7d"}, + {file = "ijson-3.5.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7389a56b8562a19948bdf1d7bae3a2edc8c7f86fb59834dcb1c4c722818e645a"}, + {file = "ijson-3.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3176f23f8ebec83f374ed0c3b4e5a0c4db7ede54c005864efebbed46da123608"}, + {file = "ijson-3.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6babd88e508630c6ef86c9bebaaf13bb2fb8ec1d8f8868773a03c20253f599bc"}, + {file = "ijson-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dc1b3836b174b6db2fa8319f1926fb5445abd195dc963368092103f8579cb8ed"}, + {file = "ijson-3.5.0-cp312-cp312-win32.whl", hash = "sha256:6673de9395fb9893c1c79a43becd8c8fbee0a250be6ea324bfd1487bb5e9ee4c"}, + {file = "ijson-3.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:f4f7fabd653459dcb004175235f310435959b1bb5dfa8878578391c6cc9ad944"}, + {file = "ijson-3.5.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e9cedc10e40dd6023c351ed8bfc7dcfce58204f15c321c3c1546b9c7b12562a4"}, + {file = "ijson-3.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3647649f782ee06c97490b43680371186651f3f69bebe64c6083ee7615d185e5"}, + {file = "ijson-3.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:90e74be1dce05fce73451c62d1118671f78f47c9f6be3991c82b91063bf01fc9"}, + {file = "ijson-3.5.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:78e9ad73e7be2dd80627504bd5cbf512348c55ce2c06e362ed7683b5220e8568"}, + {file = "ijson-3.5.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9577449313cc94be89a4fe4b3e716c65f09cc19636d5a6b2861c4e80dddebd58"}, + {file = "ijson-3.5.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e4c1178fb50aff5f5701a30a5152ead82a14e189ce0f6102fa1b5f10b2f54ff"}, + {file = "ijson-3.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0eb402ab026ffb37a918d75af2b7260fe6cfbce13232cc83728a714dd30bd81d"}, + {file = "ijson-3.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5b08ee08355f9f729612a8eb9bf69cc14f9310c3b2a487c6f1c3c65d85216ec4"}, + {file = "ijson-3.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bda62b6d48442903e7bf56152108afb7f0f1293c2b9bef2f2c369defea76ab18"}, + {file = "ijson-3.5.0-cp313-cp313-win32.whl", hash = "sha256:8d073d9b13574cfa11083cc7267c238b7a6ed563c2661e79192da4a25f09c82c"}, + {file = "ijson-3.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:2419f9e32e0968a876b04d8f26aeac042abd16f582810b576936bbc4c6015069"}, + {file = "ijson-3.5.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4d4b0cd676b8c842f7648c1a783448fac5cd3b98289abd83711b3e275e143524"}, + {file = "ijson-3.5.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:252dec3680a48bb82d475e36b4ae1b3a9d7eb690b951bb98a76c5fe519e30188"}, + {file = "ijson-3.5.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:aa1b5dca97d323931fde2501172337384c958914d81a9dac7f00f0d4bfc76bc7"}, + {file = "ijson-3.5.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7a5ec7fd86d606094bba6f6f8f87494897102fa4584ef653f3005c51a784c320"}, + {file = "ijson-3.5.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:009f41443e1521847701c6d87fa3923c0b1961be3c7e7de90947c8cb92ea7c44"}, + {file = "ijson-3.5.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e4c3651d1f9fe2839a93fdf8fd1d5ca3a54975349894249f3b1b572bcc4bd577"}, + {file = "ijson-3.5.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:945b7abcfcfeae2cde17d8d900870f03536494245dda7ad4f8d056faa303256c"}, + {file = "ijson-3.5.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:0574b0a841ff97495c13e9d7260fbf3d85358b061f540c52a123db9dbbaa2ed6"}, + {file = "ijson-3.5.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f969ffb2b89c5cdf686652d7fb66252bc72126fa54d416317411497276056a18"}, + {file = "ijson-3.5.0-cp313-cp313t-win32.whl", hash = "sha256:59d3f9f46deed1332ad669518b8099920512a78bda64c1f021fcd2aff2b36693"}, + {file = "ijson-3.5.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5c2839fa233746d8aad3b8cd2354e441613f5df66d721d59da4a09394bd1db2b"}, + {file = "ijson-3.5.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:25a5a6b2045c90bb83061df27cfa43572afa43ba9408611d7bfe237c20a731a9"}, + {file = "ijson-3.5.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:8976c54c0b864bc82b951bae06567566ac77ef63b90a773a69cd73aab47f4f4f"}, + {file = "ijson-3.5.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:859eb2038f7f1b0664df4241957694cc35e6295992d71c98659b22c69b3cbc10"}, + {file = "ijson-3.5.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c911aa02991c7c0d3639b6619b93a93210ff1e7f58bf7225d613abea10adc78e"}, + {file = "ijson-3.5.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:903cbdc350173605220edc19796fbea9b2203c8b3951fb7335abfa8ed37afda8"}, + {file = "ijson-3.5.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a4549d96ded5b8efa71639b2160235415f6bdb8c83367615e2dbabcb72755c33"}, + {file = "ijson-3.5.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:6b2dcf6349e6042d83f3f8c39ce84823cf7577eba25bac5aae5e39bbbbbe9c1c"}, + {file = "ijson-3.5.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:e44af39e6f8a17e5627dcd89715d8279bf3474153ff99aae031a936e5c5572e5"}, + {file = "ijson-3.5.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9260332304b7e7828db56d43f08fc970a3ab741bf84ff10189361ea1b60c395b"}, + {file = "ijson-3.5.0-cp314-cp314-win32.whl", hash = "sha256:63bc8121bb422f6969ced270173a3fa692c29d4ae30c860a2309941abd81012a"}, + {file = "ijson-3.5.0-cp314-cp314-win_amd64.whl", hash = "sha256:01b6dad72b7b7df225ef970d334556dfad46c696a2c6767fb5d9ed8889728bca"}, + {file = "ijson-3.5.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:2ea4b676ec98e374c1df400a47929859e4fa1239274339024df4716e802aa7e4"}, + {file = "ijson-3.5.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:014586eec043e23c80be9a923c56c3a0920a0f1f7d17478ce7bc20ba443968ef"}, + {file = "ijson-3.5.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d5b8b886b0248652d437f66e7c5ac318bbdcb2c7137a7e5327a68ca00b286f5f"}, + {file = "ijson-3.5.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:498fd46ae2349297e43acf97cdc421e711dbd7198418677259393d2acdc62d78"}, + {file = "ijson-3.5.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22a51b4f9b81f12793731cf226266d1de2112c3c04ba4a04117ad4e466897e05"}, + {file = "ijson-3.5.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9636c710dc4ac4a281baa266a64f323b4cc165cec26836af702c44328b59a515"}, + {file = "ijson-3.5.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f7168a39e8211107666d71b25693fd1b2bac0b33735ef744114c403c6cac21e1"}, + {file = "ijson-3.5.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8696454245415bc617ab03b0dc3ae4c86987df5dc6a90bad378fe72c5409d89e"}, + {file = "ijson-3.5.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c21bfb61f71f191565885bf1bc29e0a186292d866b4880637b833848360bdc1b"}, + {file = "ijson-3.5.0-cp314-cp314t-win32.whl", hash = "sha256:a2619460d6795b70d0155e5bf016200ac8a63ab5397aa33588bb02b6c21759e6"}, + {file = "ijson-3.5.0-cp314-cp314t-win_amd64.whl", hash = "sha256:4f24b78d4ef028d17eb57ad1b16c0aed4a17bdd9badbf232dc5d9305b7e13854"}, + {file = "ijson-3.5.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0ec62d397447cbe4941818c53e22b054e03250ff9cdbaea75144b11bc6db44ed"}, + {file = "ijson-3.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:75980237a16e5e36ad46fbdd33e3f3d817c187624974c48947df0a2bfa104b9e"}, + {file = "ijson-3.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a9c321e8e1cdeac8aac698d09a90d98a049c9be8e8330c89cf2fcc517c96d51d"}, + {file = "ijson-3.5.0-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:92878b130d7ad71919c70b4f50ad23ec7fbf2d09a9c675f9179d49c4be869a63"}, + {file = "ijson-3.5.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a1ab890d43656c1d12c4a8dafb7fac5a2278ed3e4408102e0971f48b6ed4583d"}, + {file = "ijson-3.5.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a55185e8983fef0b21abc1a0bbaa11eeb2fabdc651e2167f23defa9fe4eb999b"}, + {file = "ijson-3.5.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5a3af031e30751164c3289294f249f942535fbe7e8f35eb3ecc374247449214e"}, + {file = "ijson-3.5.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:f4c8f5ccf7230a9a94c1d836322783ed0c0ec2a151f3d53b2e0a67c89ad66970"}, + {file = "ijson-3.5.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6e249796d2090afc1c42d2458ab0dbf0072a30ffa246b5683e3f7b9dc9b1b7f9"}, + {file = "ijson-3.5.0-cp39-cp39-win32.whl", hash = "sha256:1b2cf2c0c79313fbc607a0d90788ffb4f4614872983af4aa85c5b92533ec4da2"}, + {file = "ijson-3.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:d38cb03f6b7cc26d542ff710adfe98e5f6d53878461c45456c97d3668297ec0d"}, + {file = "ijson-3.5.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d64c624da0e9d692d6eb0ff63a79656b59d76bf80773a17c5b0f835e4e8ef627"}, + {file = "ijson-3.5.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:876f7df73b7e0d6474f9caa729b9cdbfc8e76de9075a4887dfd689e29e85c4ca"}, + {file = "ijson-3.5.0-pp311-pypy311_pp73-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e7dbff2c8d9027809b0cde663df44f3210da10ea377121d42896fb6ee405dd31"}, + {file = "ijson-3.5.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4217a1edc278660679e1197c83a1a2a2d367792bfbb2a3279577f4b59b93730d"}, + {file = "ijson-3.5.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:04f0fc740311388ee745ba55a12292b722d6f52000b11acbb913982ba5fbdf87"}, + {file = "ijson-3.5.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:fdeee6957f92e0c114f65c55cf8fe7eabb80cfacab64eea6864060913173f66d"}, + {file = "ijson-3.5.0.tar.gz", hash = "sha256:94688760720e3f5212731b3cb8d30267f9a045fb38fb3870254e7b9504246f31"}, +] + [[package]] name = "isort" version = "8.0.1" @@ -1310,7 +1416,8 @@ files = [ ] [extras] -all = ["defusedxml", "pyyaml", "tomlkit"] +all = ["defusedxml", "ijson", "pyyaml", "tomlkit"] +json-loader = ["ijson"] toml-loader = ["tomlkit"] xml-loader = ["defusedxml"] yaml-loader = ["pyyaml"] @@ -1318,4 +1425,4 @@ yaml-loader = ["pyyaml"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<4.0" -content-hash = "c436b17bc26b05df172933d6c2063092dfbfc27c1f05ee549a0f288ba4b89bc4" +content-hash = "f2c5681125edbd7634dc356fff1f75814ce64802800d96a1a3dcd731facd928e" diff --git a/pyproject.toml b/pyproject.toml index 88958a8..d1c783b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ packages = [ python = ">=3.10,<4.0" joblib = ">=1.2.0" defusedxml = { version = ">=0.7,<0.8", optional = true } +ijson = { version = ">=3.1", optional = true } pyyaml = "^6.0.1" tomlkit = "^0.13.2" setuptools = ">=70.0.0" @@ -24,7 +25,8 @@ setuptools = ">=70.0.0" xml_loader = ["defusedxml"] yaml_loader = ["pyyaml"] toml_loader = ["tomlkit"] -all = ["defusedxml", "pyyaml", "tomlkit"] +json_loader = ["ijson"] +all = ["defusedxml", "ijson", "pyyaml", "tomlkit"] [tool.poetry.group.test.dependencies] parameterized = "*" diff --git a/pystreamapi/loaders/__json/__json_loader.py b/pystreamapi/loaders/__json/__json_loader.py index 70364b1..8701c1e 100644 --- a/pystreamapi/loaders/__json/__json_loader.py +++ b/pystreamapi/loaders/__json/__json_loader.py @@ -1,9 +1,69 @@ -import json as jsonlib +import io from collections import namedtuple from typing import Any, Iterator +try: + import ijson +except ImportError as exc: + raise ImportError( + "Please install the json_loader extra dependency (ijson) to use the json loader." + ) from exc + from pystreamapi.loaders.__loader_utils import LoaderUtils +_PEEK_SIZE = 4096 + + +class _TextToBytesWrapper: + """Wraps a text-mode file handle and converts its output to bytes for ijson.""" + + def __init__(self, handle, encoding='utf-8'): + """Initialize the wrapper with a file handle and text encoding.""" + self._handle = handle + self._encoding = encoding + + def read(self, size=-1): + """Read up to size characters from the handle and return bytes, encoding text as needed.""" + data = self._handle.read(size) + if isinstance(data, str): + return data.encode(self._encoding) + return data if data else b'' + + +class _PeekableBytesReader: + """Replays a pre-read buffer before delegating further reads to the underlying source.""" + + def __init__(self, buffer: bytes, source): + """Initialize the peekable bytes reader with a pre-read buffer and underlying source.""" + self._buf = buffer + self._src = source + + def read(self, size=-1): + """ + Read up to size bytes, replaying the pre-read buffer before + reading from the underlying source. + """ + if size == -1: + # Full-read path: used by non-chunking callers (e.g. test helpers). + # Streaming callers (like ijson) always pass an explicit chunk size. + tail = self._src.read() + if isinstance(tail, str): + tail = tail.encode('utf-8') + result = self._buf + tail + self._buf = b'' + return result + if len(self._buf) >= size: + result = self._buf[:size] + self._buf = self._buf[size:] + return result + needed = size - len(self._buf) + more = self._src.read(needed) + if isinstance(more, str): + more = more.encode('utf-8') + result = self._buf + more + self._buf = b'' + return result + def json(src: str, read_from_src=False) -> Iterator[Any]: """ @@ -24,44 +84,70 @@ def json(src: str, read_from_src=False) -> Iterator[Any]: def __lazy_load_json_file(file_path: str) -> Iterator[Any]: - """Lazily read and parse a JSON file, yielding namedtuples.""" + """Lazily read and parse a JSON file, yielding namedtuples incrementally.""" def generator(): - """Generate namedtuples from the JSON file contents.""" + """Yield namedtuples from the JSON file using a streaming parser.""" # skipcq: PTC-W6004 with open(file_path, mode='r', encoding='utf-8') as jsonfile: - src = jsonfile.read() - if not src.strip(): - return - result = jsonlib.loads(src, object_hook=__dict_to_namedtuple) - if isinstance(result, list): - yield from result - else: - yield result + yield from __stream_json_items(jsonfile) return generator() def __lazy_load_json_string(json_string: str) -> Iterator[Any]: - """Lazily parse a JSON string, yielding namedtuples.""" + """Lazily parse a JSON string, yielding namedtuples incrementally.""" def generator(): - """Internal generator that yields namedtuples by parsing the JSON string on demand.""" - if not json_string.strip(): - return - result = jsonlib.loads(json_string, object_hook=__dict_to_namedtuple) - if isinstance(result, list): - yield from result - else: - yield result + """Yield namedtuples by streaming-parsing the JSON string.""" + yield from __stream_json_items(io.StringIO(json_string)) return generator() +def __stream_json_items(handle) -> Iterator[Any]: + """Stream JSON items from a text-mode file-like handle using ijson. + + Reads an initial chunk to detect whether the root value is an array or a + single object, then replays that chunk together with the remainder of the + handle through a bytes wrapper so that ijson can parse incrementally. + """ + initial = handle.read(_PEEK_SIZE) + if isinstance(initial, str): + initial_str = initial + initial_bytes = initial.encode('utf-8') + else: + initial_bytes = initial + initial_str = initial.decode('utf-8', errors='replace') + + stripped = initial_str.lstrip() + if not stripped: + return + + first_char = stripped[0] + reader = _PeekableBytesReader(initial_bytes, _TextToBytesWrapper(handle)) + + if first_char == '[': + for item in ijson.items(reader, 'item', use_float=True): + yield __dict_to_namedtuple(item) + else: + obj = next(ijson.items(reader, '', use_float=True), None) + if obj is not None: + yield __dict_to_namedtuple(obj) + + def __dict_to_namedtuple(d, name='Item'): - """Convert a dictionary to a namedtuple""" + """Convert a dictionary (and any nested dicts/lists) to namedtuples recursively. + + List values are materialised eagerly because namedtuple field values must be + concrete sequences. This is O(size of the current item) — the same behaviour + as the previous json.loads(object_hook=...) approach — while top-level streaming + (one item at a time) is handled by the ijson layer above. + """ if isinstance(d, dict): fields = list(d.keys()) Item = namedtuple(name, fields) return Item(**{k: __dict_to_namedtuple(v, k) for k, v in d.items()}) + if isinstance(d, list): + return [__dict_to_namedtuple(item) for item in d] return d diff --git a/pystreamapi/loaders/__xml/__xml_loader.py b/pystreamapi/loaders/__xml/__xml_loader.py index 1a12aba..7be5822 100644 --- a/pystreamapi/loaders/__xml/__xml_loader.py +++ b/pystreamapi/loaders/__xml/__xml_loader.py @@ -1,3 +1,4 @@ +import io from typing import Iterator, Any try: @@ -36,36 +37,62 @@ def xml(src: str, read_from_src=False, retrieve_children=True, cast_types=True, def _lazy_parse_xml_file(file_path: str, encoding: str, retrieve_children: bool, cast_types: bool) -> Iterator[Any]: - """Lazily parse an XML file by reading its content and yielding parsed namedtuples.""" + """ + Lazily parse an XML file using iterparse, yielding namedtuples + without reading all at once. + """ def generator(): - """Generator that reads the XML file and yields parsed namedtuples lazily.""" + """ + Generator that streams XML elements from the file and yields + namedtuples lazily. + """ # skipcq: PTC-W6004 with open(file_path, mode='r', encoding=encoding) as xmlfile: - xml_string = xmlfile.read() - yield from _parse_xml_string_lazy(xml_string, retrieve_children, cast_types) + yield from _iterparse_xml(xmlfile, retrieve_children, cast_types) return generator() def _lazy_parse_xml_string(xml_string: str, retrieve_children: bool, cast_types: bool) -> Iterator[Any]: - """Lazily parse an XML string by yielding parsed namedtuples for each element.""" + """Lazily parse an XML string using iterparse, yielding namedtuples without a full DOM build.""" def generator(): - """Generator that yields parsed namedtuples from the XML string lazily.""" - yield from _parse_xml_string_lazy(xml_string, retrieve_children, cast_types) + """Generator that streams XML elements from a string source and yields namedtuples.""" + yield from _iterparse_xml(io.StringIO(xml_string), retrieve_children, cast_types) return generator() -def _parse_xml_string_lazy(xml_string: str, retrieve_children: bool, - cast_types: bool) -> Iterator[Any]: - """Parse an XML string into namedtuples, optionally yielding child elements lazily.""" - root = ElementTree.fromstring(xml_string) - parsed = __parse_xml(root, cast_types) - if retrieve_children: - yield from __flatten(parsed) - else: - yield parsed +def _iterparse_xml(source: "IO[Any]", retrieve_children: bool, cast_types: bool) -> Iterator[Any]: + """Drive iterparse over *source* and yield namedtuples incrementally. + + When *retrieve_children* is True each direct child of the root element is + converted and yielded as soon as its closing tag is encountered; the child + is then removed from the root so that memory is freed immediately. + + When *retrieve_children* is False the entire document is consumed and the + root element is converted and yielded once. + """ + depth = 0 + root = None + context = ElementTree.iterparse(source, events=('start', 'end')) + + for event, elem in context: + if event == 'start': + depth += 1 + if root is None: + root = elem + else: # 'end' + depth -= 1 + if retrieve_children: + if depth == 1: + yield __parse_xml(elem, cast_types) + elem.clear() + root.remove(elem) + else: + if depth == 0: + yield __parse_xml(root, cast_types) + return def __parse_xml(element, cast_types: bool): @@ -105,12 +132,3 @@ def __parse_multiple_elements(element, cast_types: bool): def __filter_single_items(tag_dict): """Filter out single-item lists from a dictionary.""" return {key: value[0] if len(value) == 1 else value for key, value in tag_dict.items()} - - -def __flatten(data): - """Yield flattened elements from a possibly nested structure.""" - for item in data: - if isinstance(item, list): - yield from item - else: - yield item diff --git a/tox.ini b/tox.ini index e377225..85ebbab 100644 --- a/tox.ini +++ b/tox.ini @@ -11,6 +11,7 @@ deps = defusedxml pyyaml tomlkit + ijson commands = coverage run -m unittest discover -s tests -t tests --pattern 'test_*.py' coverage xml