diff --git a/poetry.lock b/poetry.lock index a3f934f..3aae21a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1294,6 +1294,7 @@ files = [ {file = "tomlkit-0.13.3-py3-none-any.whl", hash = "sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0"}, {file = "tomlkit-0.13.3.tar.gz", hash = "sha256:430cf247ee57df2b94ee3fbe588e71d362a941ebb545dec29b53961d61add2a1"}, ] +markers = {main = "extra == \"toml-loader\" or extra == \"all\""} [[package]] name = "typing-extensions" @@ -1309,11 +1310,12 @@ files = [ ] [extras] -all = ["defusedxml", "pyyaml"] +all = ["defusedxml", "pyyaml", "tomlkit"] +toml-loader = ["tomlkit"] xml-loader = ["defusedxml"] yaml-loader = ["pyyaml"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<4.0" -content-hash = "1884cd09143fe577fc8d1478404ea62a9936fd815f467da3530d61872c20bb9a" +content-hash = "c436b17bc26b05df172933d6c2063092dfbfc27c1f05ee549a0f288ba4b89bc4" diff --git a/pyproject.toml b/pyproject.toml index 568c55b..88958a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,8 @@ setuptools = ">=70.0.0" [tool.poetry.extras] xml_loader = ["defusedxml"] yaml_loader = ["pyyaml"] -all = ["defusedxml", "pyyaml"] +toml_loader = ["tomlkit"] +all = ["defusedxml", "pyyaml", "tomlkit"] [tool.poetry.group.test.dependencies] parameterized = "*" diff --git a/pystreamapi/loaders/__init__.py b/pystreamapi/loaders/__init__.py index 11b171d..98ed230 100644 --- a/pystreamapi/loaders/__init__.py +++ b/pystreamapi/loaders/__init__.py @@ -1,7 +1,8 @@ from pystreamapi.loaders.__csv.__csv_loader import csv from pystreamapi.loaders.__json.__json_loader import json +from pystreamapi.loaders.__toml.__toml_loader import toml -__all__ = ['csv', 'json'] +__all__ = ['csv', 'json', 'toml'] try: from pystreamapi.loaders.__xml.__xml_loader import xml diff --git a/pystreamapi/loaders/__toml/__init__.py b/pystreamapi/loaders/__toml/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pystreamapi/loaders/__toml/__toml_loader.py b/pystreamapi/loaders/__toml/__toml_loader.py new file mode 100644 index 0000000..a413f35 --- /dev/null +++ b/pystreamapi/loaders/__toml/__toml_loader.py @@ -0,0 +1,63 @@ +from collections import namedtuple +from typing import Any, Iterator + +import tomlkit +from pystreamapi.loaders.__loader_utils import LoaderUtils + + +def toml(src: str, read_from_src=False) -> Iterator[Any]: + """ + Lazily loads TOML data from either a path or a string and yields namedtuples. + + Args: + src (str): Either the path to a TOML file or a TOML string. + read_from_src (bool): If True, src is treated as a TOML string. + If False, src is treated as a path to a TOML file. + + Yields: + namedtuple: The TOML document as a namedtuple. + """ + if read_from_src: + return __lazy_load_toml_string(src) + path = LoaderUtils.validate_path(src) + return __lazy_load_toml_file(path) + + +def __lazy_load_toml_file(file_path: str) -> Iterator[Any]: + """Lazily read and parse a TOML file, yielding a namedtuple.""" + + def generator(): + """Generate a namedtuple from the TOML file contents.""" + # skipcq: PTC-W6004 + with open(file_path, mode='r', encoding='utf-8') as tomlfile: + src = tomlfile.read() + if not src.strip(): + return + result = tomlkit.loads(src) + yield __dict_to_namedtuple(result) + + return generator() + + +def __lazy_load_toml_string(toml_string: str) -> Iterator[Any]: + """Lazily parse a TOML string, yielding a namedtuple.""" + + def generator(): + """Internal generator that yields a namedtuple by parsing the TOML string on demand.""" + if not toml_string.strip(): + return + result = tomlkit.loads(toml_string) + yield __dict_to_namedtuple(result) + + return generator() + + +def __dict_to_namedtuple(data, name='Item'): + """Recursively convert a dictionary (or list) to namedtuples.""" + if isinstance(data, dict): + fields = list(data.keys()) + Item = namedtuple(name, fields) + return Item(**{k: __dict_to_namedtuple(v, k) for k, v in data.items()}) + if isinstance(data, list): + return [__dict_to_namedtuple(item, name) for item in data] + return data diff --git a/tests/_loaders/test_toml_loader.py b/tests/_loaders/test_toml_loader.py new file mode 100644 index 0000000..b987986 --- /dev/null +++ b/tests/_loaders/test_toml_loader.py @@ -0,0 +1,142 @@ +# pylint: disable=not-context-manager +from types import GeneratorType +from unittest import TestCase + +import tomlkit.exceptions + +from _loaders.file_test import LoaderTestBase +from pystreamapi.loaders import toml + +# A simple flat TOML document +file_content = """ +attr1 = 1 +attr2 = 2.0 +nested = {attr3 = "hello"} +""" + +# TOML with an array of tables (non-consistent fields across entries) +non_consistent_content = """ +[[employees.employee]] +name = "John Doe" +position = "Software Engineer" +salary = 80000 +children = 2 + +[[employees.employee]] +name = "Alice Smith" +position = "Network Administrator" +salary = 75000 +children = 1 + +[[employees.employee]] +name = "Bob Johnson" +experience = "Database" +wage = 82000 +car = "Audi" +""" + +file_path = 'path/to/data.toml' + + +class TestTomlLoader(LoaderTestBase, TestCase): + + def test_toml_loader_from_file(self): + with self.mock_file(file_content): + self._check_extracted_data(toml(file_path)) + + def test_toml_loader_from_string(self): + self._check_extracted_data(toml(file_content, read_from_src=True)) + + def test_toml_loader_is_iterable(self): + with self.mock_file(file_content): + self.assertEqual(len(list(iter(toml(file_path)))), 1) + + def test_toml_loader_is_lazy(self): + with self.mock_file(file_content): + self.assertIsInstance(toml(file_path), GeneratorType) + + def test_toml_loader_with_empty_file(self): + with self.mock_file(""): + self.assertEqual(list(toml(file_path)), []) + + def test_toml_loader_from_empty_string(self): + self.assertEqual(list(toml("", read_from_src=True)), []) + + def test_toml_loader_with_invalid_path(self): + with self.assertRaises(FileNotFoundError): + toml('path/to/invalid.toml') + + def test_toml_loader_with_no_file(self): + with self.assertRaises(ValueError): + toml('../') + + def test_toml_loader_with_malformed_toml(self): + with self.assertRaises(tomlkit.exceptions.ParseError): + list(toml("invalid = = toml", read_from_src=True)) + + def test_toml_loader_non_consistent_data(self): + """Each [[array of tables]] entry may have different fields.""" + with self.mock_file(non_consistent_content): + data = list(toml(file_path)) + self.assertEqual(len(data), 1) + employees = data[0].employees.employee + self.assertEqual(len(employees), 3) + + # First employee has name, position, salary, children + self.assertEqual(employees[0].name, "John Doe") + self.assertEqual(employees[0].salary, 80000) + self.assertIsInstance(employees[0].salary, int) + + # Second employee has name, position, salary, children + self.assertEqual(employees[1].name, "Alice Smith") + self.assertEqual(employees[1].salary, 75000) + + # Third employee has different fields (experience, wage, car) + self.assertEqual(employees[2].name, "Bob Johnson") + self.assertEqual(employees[2].wage, 82000) + self.assertEqual(employees[2].car, "Audi") + + def test_toml_loader_non_consistent_from_string(self): + """Same as above but loading from a string.""" + data = list(toml(non_consistent_content, read_from_src=True)) + self.assertEqual(len(data), 1) + employees = data[0].employees.employee + self.assertEqual(len(employees), 3) + self.assertEqual(employees[0].name, "John Doe") + self.assertEqual(employees[2].car, "Audi") + + def test_toml_loader_native_types(self): + """TOML is self-typed so int, float, bool values should be their native types.""" + content = "count = 42\nrate = 3.14\nflag = true\n" + data = list(toml(content, read_from_src=True)) + self.assertEqual(len(data), 1) + item = data[0] + self.assertEqual(item.count, 42) + self.assertIsInstance(item.count, int) + self.assertAlmostEqual(item.rate, 3.14) + self.assertIsInstance(item.rate, float) + self.assertTrue(item.flag) + self.assertIsInstance(item.flag, bool) + + def test_toml_loader_nested_table(self): + """Nested TOML tables are converted to nested namedtuples.""" + with self.mock_file(file_content): + data = list(toml(file_path)) + self.assertEqual(len(data), 1) + item = data[0] + self.assertEqual(item.nested.attr3, "hello") + self.assertIsInstance(item.nested.attr3, str) + + def _check_extracted_data(self, data): + try: + first = next(data) + except StopIteration: + self.fail("Expected first item but iterator was empty") + self.assertEqual(first.attr1, 1) + self.assertIsInstance(first.attr1, int) + self.assertAlmostEqual(first.attr2, 2.0) + self.assertIsInstance(first.attr2, float) + self.assertEqual(first.nested.attr3, "hello") + self.assertIsInstance(first.nested.attr3, str) + + self.assertRaises(StopIteration, next, data) diff --git a/tox.ini b/tox.ini index fea8938..e377225 100644 --- a/tox.ini +++ b/tox.ini @@ -10,6 +10,7 @@ deps = parameterized defusedxml pyyaml + tomlkit commands = coverage run -m unittest discover -s tests -t tests --pattern 'test_*.py' coverage xml