Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ setuptools = ">=70.0.0"
[tool.poetry.extras]
xml_loader = ["defusedxml"]
yaml_loader = ["pyyaml"]
all = ["defusedxml", "pyyaml"]
toml_loader = ["tomlkit"]
all = ["defusedxml", "pyyaml", "tomlkit"]

[tool.poetry.group.test.dependencies]
parameterized = "*"
Expand Down
3 changes: 2 additions & 1 deletion pystreamapi/loaders/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from pystreamapi.loaders.__csv.__csv_loader import csv
from pystreamapi.loaders.__json.__json_loader import json
from pystreamapi.loaders.__toml.__toml_loader import toml

__all__ = ['csv', 'json']
__all__ = ['csv', 'json', 'toml']

try:
from pystreamapi.loaders.__xml.__xml_loader import xml
Expand Down
Empty file.
63 changes: 63 additions & 0 deletions pystreamapi/loaders/__toml/__toml_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from collections import namedtuple
from typing import Any, Iterator

import tomlkit
from pystreamapi.loaders.__loader_utils import LoaderUtils


def toml(src: str, read_from_src=False) -> Iterator[Any]:
"""
Lazily loads TOML data from either a path or a string and yields namedtuples.

Args:
src (str): Either the path to a TOML file or a TOML string.
read_from_src (bool): If True, src is treated as a TOML string.
If False, src is treated as a path to a TOML file.

Yields:
namedtuple: The TOML document as a namedtuple.
"""
if read_from_src:
return __lazy_load_toml_string(src)
path = LoaderUtils.validate_path(src)
return __lazy_load_toml_file(path)


def __lazy_load_toml_file(file_path: str) -> Iterator[Any]:
"""Lazily read and parse a TOML file, yielding a namedtuple."""

def generator():
"""Generate a namedtuple from the TOML file contents."""
# skipcq: PTC-W6004
with open(file_path, mode='r', encoding='utf-8') as tomlfile:
src = tomlfile.read()
if not src.strip():
return
result = tomlkit.loads(src)
yield __dict_to_namedtuple(result)

return generator()


def __lazy_load_toml_string(toml_string: str) -> Iterator[Any]:
"""Lazily parse a TOML string, yielding a namedtuple."""

def generator():
"""Internal generator that yields a namedtuple by parsing the TOML string on demand."""
if not toml_string.strip():
return
result = tomlkit.loads(toml_string)
yield __dict_to_namedtuple(result)

return generator()


def __dict_to_namedtuple(data, name='Item'):
"""Recursively convert a dictionary (or list) to namedtuples."""
if isinstance(data, dict):
fields = list(data.keys())
Item = namedtuple(name, fields)
Comment on lines +55 to +59
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue: Raw TOML keys and the name argument may not be valid Python identifiers, which can cause namedtuple construction to fail.

Because TOML keys and table names can include -, spaces, or start with digits, both the generated field names and the name you pass to namedtuple can be invalid identifiers, causing a ValueError at runtime.

To handle this safely:

  • Call namedtuple(name, fields, rename=True) to automatically fix invalid/duplicate field names.
  • Sanitize name when it comes from a TOML key (e.g., replace non-identifier characters, or prefix if it starts with a digit).

This will let the loader handle a wider range of valid TOML inputs without failing.

return Item(**{k: __dict_to_namedtuple(v, k) for k, v in data.items()})
if isinstance(data, list):
return [__dict_to_namedtuple(item, name) for item in data]
return data
142 changes: 142 additions & 0 deletions tests/_loaders/test_toml_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
# pylint: disable=not-context-manager
from types import GeneratorType
from unittest import TestCase

import tomlkit.exceptions

from _loaders.file_test import LoaderTestBase
from pystreamapi.loaders import toml

# A simple flat TOML document
file_content = """
attr1 = 1
attr2 = 2.0
nested = {attr3 = "hello"}
"""

# TOML with an array of tables (non-consistent fields across entries)
non_consistent_content = """
[[employees.employee]]
name = "John Doe"
position = "Software Engineer"
salary = 80000
children = 2

[[employees.employee]]
name = "Alice Smith"
position = "Network Administrator"
salary = 75000
children = 1

[[employees.employee]]
name = "Bob Johnson"
experience = "Database"
wage = 82000
car = "Audi"
"""

file_path = 'path/to/data.toml'


class TestTomlLoader(LoaderTestBase, TestCase):

def test_toml_loader_from_file(self):
with self.mock_file(file_content):
self._check_extracted_data(toml(file_path))

def test_toml_loader_from_string(self):
self._check_extracted_data(toml(file_content, read_from_src=True))

def test_toml_loader_is_iterable(self):
with self.mock_file(file_content):
self.assertEqual(len(list(iter(toml(file_path)))), 1)

def test_toml_loader_is_lazy(self):
with self.mock_file(file_content):
self.assertIsInstance(toml(file_path), GeneratorType)

def test_toml_loader_with_empty_file(self):
with self.mock_file(""):
self.assertEqual(list(toml(file_path)), [])

def test_toml_loader_from_empty_string(self):
self.assertEqual(list(toml("", read_from_src=True)), [])

def test_toml_loader_with_invalid_path(self):
with self.assertRaises(FileNotFoundError):
toml('path/to/invalid.toml')
Comment on lines +58 to +67
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Consider adding a test for whitespace-only TOML input to mirror the implementation’s strip() check

Since empty content is detected via .strip(), whitespace-only input (e.g. " \n") should be treated as empty as well. Please add a test like self.assertEqual(list(toml(" \n", read_from_src=True)), []) to capture this behavior and prevent regressions in whitespace handling.

Suggested change
def test_toml_loader_with_empty_file(self):
with self.mock_file(""):
self.assertEqual(list(toml(file_path)), [])
def test_toml_loader_from_empty_string(self):
self.assertEqual(list(toml("", read_from_src=True)), [])
def test_toml_loader_with_invalid_path(self):
with self.assertRaises(FileNotFoundError):
toml('path/to/invalid.toml')
def test_toml_loader_with_empty_file(self):
with self.mock_file(""):
self.assertEqual(list(toml(file_path)), [])
def test_toml_loader_from_empty_string(self):
self.assertEqual(list(toml("", read_from_src=True)), [])
def test_toml_loader_from_whitespace_string(self):
self.assertEqual(list(toml(" \n", read_from_src=True)), [])
def test_toml_loader_with_invalid_path(self):
with self.assertRaises(FileNotFoundError):
toml('path/to/invalid.toml')


def test_toml_loader_with_no_file(self):
with self.assertRaises(ValueError):
toml('../')

def test_toml_loader_with_malformed_toml(self):
with self.assertRaises(tomlkit.exceptions.ParseError):
list(toml("invalid = = toml", read_from_src=True))

def test_toml_loader_non_consistent_data(self):
"""Each [[array of tables]] entry may have different fields."""
with self.mock_file(non_consistent_content):
data = list(toml(file_path))
self.assertEqual(len(data), 1)
employees = data[0].employees.employee
self.assertEqual(len(employees), 3)

# First employee has name, position, salary, children
self.assertEqual(employees[0].name, "John Doe")
self.assertEqual(employees[0].salary, 80000)
self.assertIsInstance(employees[0].salary, int)

# Second employee has name, position, salary, children
self.assertEqual(employees[1].name, "Alice Smith")
self.assertEqual(employees[1].salary, 75000)

# Third employee has different fields (experience, wage, car)
self.assertEqual(employees[2].name, "Bob Johnson")
self.assertEqual(employees[2].wage, 82000)
self.assertEqual(employees[2].car, "Audi")

def test_toml_loader_non_consistent_from_string(self):
"""Same as above but loading from a string."""
data = list(toml(non_consistent_content, read_from_src=True))
self.assertEqual(len(data), 1)
employees = data[0].employees.employee
self.assertEqual(len(employees), 3)
self.assertEqual(employees[0].name, "John Doe")
self.assertEqual(employees[2].car, "Audi")

def test_toml_loader_native_types(self):
"""TOML is self-typed so int, float, bool values should be their native types."""
content = "count = 42\nrate = 3.14\nflag = true\n"
data = list(toml(content, read_from_src=True))
self.assertEqual(len(data), 1)
item = data[0]
self.assertEqual(item.count, 42)
self.assertIsInstance(item.count, int)
self.assertAlmostEqual(item.rate, 3.14)
self.assertIsInstance(item.rate, float)
self.assertTrue(item.flag)
self.assertIsInstance(item.flag, bool)

def test_toml_loader_nested_table(self):
"""Nested TOML tables are converted to nested namedtuples."""
with self.mock_file(file_content):
data = list(toml(file_path))
self.assertEqual(len(data), 1)
item = data[0]
self.assertEqual(item.nested.attr3, "hello")
self.assertIsInstance(item.nested.attr3, str)

def _check_extracted_data(self, data):
try:
first = next(data)
except StopIteration:
self.fail("Expected first item but iterator was empty")
self.assertEqual(first.attr1, 1)
self.assertIsInstance(first.attr1, int)
self.assertAlmostEqual(first.attr2, 2.0)
self.assertIsInstance(first.attr2, float)
self.assertEqual(first.nested.attr3, "hello")
self.assertIsInstance(first.nested.attr3, str)

self.assertRaises(StopIteration, next, data)
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ deps =
parameterized
defusedxml
pyyaml
tomlkit
commands =
coverage run -m unittest discover -s tests -t tests --pattern 'test_*.py'
coverage xml
Expand Down
Loading