Skip to content

Commit

Permalink
Add support for nested fields in SPIDERMON_VALIDATION_ERRORS_FIELD (#417
Browse files Browse the repository at this point in the history
)

* Add support for nested fields in SPIDERMON_VALIDATION_ERRORS_FIELD

* Added docs for this features

* Improve docstrings
  • Loading branch information
VMRuiz committed Sep 1, 2023
1 parent ea21cee commit dbd5243
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 5 deletions.
8 changes: 7 additions & 1 deletion docs/source/item-validation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,13 @@ SPIDERMON_VALIDATION_ERRORS_FIELD
Default: ``_validation``

The name of the field added to the item when a validation error happens and
`SPIDERMON_VALIDATION_ADD_ERRORS_TO_ITEMS`_ is enabled.
`SPIDERMON_VALIDATION_ADD_ERRORS_TO_ITEMS`_ is enabled. Nested fields are supported by using `.` separator:

.. code-block:: python
# settings.py
SPIDERMON_VALIDATION_ERRORS_FIELD = "top_level.second_level._validation"
SPIDERMON_VALIDATION_SCHEMAS
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down
12 changes: 9 additions & 3 deletions spidermon/contrib/scrapy/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

from spidermon.contrib.validation import JSONSchemaValidator
from spidermon.contrib.validation.jsonschema.tools import get_schema_from
from spidermon.contrib.utils.attributes import (
get_nested_attribute,
set_nested_attribute,
)

from .stats import ValidationStatsManager

Expand Down Expand Up @@ -124,11 +128,13 @@ def find_validators(self, item):
return find(item.__class__) or find(Item)

def _add_errors_to_item(self, item: ItemAdapter, errors: Dict[str, str]):
if item.get(self.errors_field, None) is None:
item[self.errors_field] = defaultdict(list)
errors_field_instance = get_nested_attribute(item, self.errors_field)
if errors_field_instance is None:
errors_field_instance = defaultdict(list)
set_nested_attribute(item, self.errors_field, errors_field_instance)

for field_name, messages in errors.items():
item[self.errors_field][field_name] += messages
errors_field_instance[field_name] += messages

def _drop_item(self, item, errors):
"""
Expand Down
Empty file.
48 changes: 48 additions & 0 deletions spidermon/contrib/utils/attributes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from typing import Any, List
from itemadapter import ItemAdapter


def traverse_nested(obj: ItemAdapter, keys: List[str]) -> ItemAdapter:
"""
Get the last nested attribute from a list of keys within an ItemAdapter object.
Raises:
KeyError: if any of the keys in the path is not defined.
"""
current_obj = obj
while keys:
try:
# Traverse next level of item object
key = keys.pop(0)
current_obj = ItemAdapter(current_obj[key])
except KeyError:
raise KeyError(f'Invalid key "{key}" for {current_obj} in {obj}')

return current_obj


def get_nested_attribute(item: ItemAdapter, attribute_path: str):
"""
Get the value of a nested attribute within an ItemAdapter.
Raises:
KeyError: if any of the keys in the path is not defined.
"""
*keys, last_key = attribute_path.split(".")
nested_obj = traverse_nested(item, keys)
return nested_obj.get(last_key)


def set_nested_attribute(item: ItemAdapter, attribute_path: str, value: Any):
"""
Set the value of a nested attribute within an ItemAdapter.
Raises:
KeyError: if any of the keys in the path is not defined or
if the last key in the path is not supported by its parent field.
"""
*keys, last_key = attribute_path.split(".")
nested_obj = traverse_nested(item, keys)
if not isinstance(nested_obj, ItemAdapter):
nested_obj = ItemAdapter(nested_obj)

nested_obj[last_key] = value
3 changes: 2 additions & 1 deletion tests/contrib/scrapy/test_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from scrapy.utils.test import get_crawler
from scrapy import Item
from functools import partial
from itemadapter import ItemAdapter

from spidermon.contrib.scrapy.pipelines import ItemValidationPipeline
from tests.fixtures.items import TreeItem, TestItem
Expand Down Expand Up @@ -234,7 +235,7 @@ def _run_pipeline(self, test_item):
test_errors = {"some_error": ["some_message"]}
crawler = get_crawler(settings_dict=settings)
pipe = ItemValidationPipeline.from_crawler(crawler)
pipe._add_errors_to_item(test_item, test_errors)
pipe._add_errors_to_item(ItemAdapter(test_item), test_errors)
return test_item

def test_add_errors_to_item(self):
Expand Down
48 changes: 48 additions & 0 deletions tests/contrib/utils/test_attributes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import pytest
from dataclasses import dataclass

from itemadapter import ItemAdapter
from spidermon.contrib.utils.attributes import (
get_nested_attribute,
set_nested_attribute,
)


def test_get_nested_attribute():
item = ItemAdapter({"foo": "bar", "attr1": {"attr2": {"attr3": "foobar"}}})

assert get_nested_attribute(item, "foo") == "bar"
assert get_nested_attribute(item, "attr1.attr2.attr3") == "foobar"
assert get_nested_attribute(item, "missing_attribute") is None

# Missing intermiddle attribute
with pytest.raises(KeyError):
get_nested_attribute(item, "attr1.missing_attribute.attr2")


def test_set_nested_attribute():
item = ItemAdapter({"foo": None, "attr1": {"attr2": {"attr3": None}}})
set_nested_attribute(item, "foo", "foobar")
assert item["foo"] == "foobar"

set_nested_attribute(item, "attr1.attr2.attr3", "bar")
assert get_nested_attribute(item, "attr1.attr2.attr3") == "bar"

# Set undefined attribute when underlaying class allows it
set_nested_attribute(item, "missing_attribute", "foo")
assert item["missing_attribute"] == "foo"

# Set undefined attribute when underlaying class doesn't allow it
@dataclass
class NestedField:
foo: str

@dataclass
class DummyItem:
attr1: NestedField

item = ItemAdapter(DummyItem(attr1=NestedField(foo="bar")))
with pytest.raises(
KeyError, match="NestedField does not support field: missing_attribute"
):
set_nested_attribute(item, "attr1.missing_attribute", "foo")

0 comments on commit dbd5243

Please sign in to comment.