From f75941f79dfeb931649f9e8880cdbd1b0a8e4681 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Thu, 30 Apr 2020 11:56:52 -0300 Subject: [PATCH 1/7] Deprecate scrapy.item.BaseItem --- scrapy/item.py | 7 ++++++- tests/test_item.py | 25 ++++++++++++++++++++----- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/scrapy/item.py b/scrapy/item.py index 7483689323b..3558b223154 100644 --- a/scrapy/item.py +++ b/scrapy/item.py @@ -28,7 +28,12 @@ class BaseItem(object_ref): Unlike instances of :class:`dict`, instances of :class:`BaseItem` may be :ref:`tracked ` to debug memory leaks. """ - pass + + def __new__(cls, *args, **kwargs): + if issubclass(cls, BaseItem) and not (issubclass(cls, Item) or issubclass(cls, DictItem)): + warn('scrapy.item.BaseItem is deprecated, please use scrapy.item.Item instead', + ScrapyDeprecationWarning, stacklevel=2) + return super(BaseItem, cls).__new__(cls, *args, **kwargs) class Field(dict): diff --git a/tests/test_item.py b/tests/test_item.py index 4017f6e84d1..1220bc98a5b 100644 --- a/tests/test_item.py +++ b/tests/test_item.py @@ -4,7 +4,7 @@ from warnings import catch_warnings from scrapy.exceptions import ScrapyDeprecationWarning -from scrapy.item import ABCMeta, DictItem, Field, Item, ItemMeta +from scrapy.item import ABCMeta, BaseItem, DictItem, Field, Item, ItemMeta PY36_PLUS = (sys.version_info.major >= 3) and (sys.version_info.minor >= 6) @@ -131,12 +131,12 @@ class TestItem(Item): self.assertSortedEqual(list(item.values()), [u'New']) def test_metaclass_inheritance(self): - class BaseItem(Item): + class ParentItem(Item): name = Field() keys = Field() values = Field() - class TestItem(BaseItem): + class TestItem(ParentItem): keys = Field() i = TestItem() @@ -321,13 +321,28 @@ class DictItemTest(unittest.TestCase): def test_deprecation_warning(self): with catch_warnings(record=True) as warnings: - dict_item = DictItem() + DictItem() self.assertEqual(len(warnings), 1) self.assertEqual(warnings[0].category, ScrapyDeprecationWarning) with catch_warnings(record=True) as warnings: class SubclassedDictItem(DictItem): pass - subclassed_dict_item = SubclassedDictItem() + SubclassedDictItem() + self.assertEqual(len(warnings), 1) + self.assertEqual(warnings[0].category, ScrapyDeprecationWarning) + + +class BaseItemTest(unittest.TestCase): + + def test_deprecation_warning(self): + with catch_warnings(record=True) as warnings: + BaseItem() + self.assertEqual(len(warnings), 1) + self.assertEqual(warnings[0].category, ScrapyDeprecationWarning) + with catch_warnings(record=True) as warnings: + class SubclassedBaseItem(BaseItem): + pass + SubclassedBaseItem() self.assertEqual(len(warnings), 1) self.assertEqual(warnings[0].category, ScrapyDeprecationWarning) From 622ce860669b8bd7fc581b74d414aef24f4fb041 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Mon, 4 May 2020 16:22:24 -0300 Subject: [PATCH 2/7] Test: make sure scrapy.item.Item does not issue a deprecation warning --- tests/test_item.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_item.py b/tests/test_item.py index 1220bc98a5b..f35a2b9f9ac 100644 --- a/tests/test_item.py +++ b/tests/test_item.py @@ -347,5 +347,18 @@ class SubclassedBaseItem(BaseItem): self.assertEqual(warnings[0].category, ScrapyDeprecationWarning) +class ItemNoDeprecationWarningTest(unittest.TestCase): + + def test_no_deprecation_warning(self): + with catch_warnings(record=True) as warnings: + Item() + self.assertEqual(len(warnings), 0) + with catch_warnings(record=True) as warnings: + class SubclassedItem(Item): + pass + SubclassedItem() + self.assertEqual(len(warnings), 0) + + if __name__ == "__main__": unittest.main() From 7988c676a956457843416f8586544a58bf121f63 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Tue, 5 May 2020 13:11:01 -0300 Subject: [PATCH 3/7] Update Item docstring, update BaseItem occurrences --- docs/faq.rst | 4 ++-- pytest.ini | 2 +- scrapy/item.py | 35 ++++++++++++++++++----------------- scrapy/spiders/feed.py | 2 +- tests/test_loader.py | 2 +- tests/test_utils_spider.py | 4 ++-- 6 files changed, 25 insertions(+), 24 deletions(-) diff --git a/docs/faq.rst b/docs/faq.rst index 75a0f4864ff..79ef6ca8578 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -342,14 +342,14 @@ method for this purpose. For example:: from copy import deepcopy - from scrapy.item import BaseItem + from scrapy.item import Item class MultiplyItemsMiddleware: def process_spider_output(self, response, result, spider): for item in result: - if isinstance(item, (BaseItem, dict)): + if isinstance(item, (Item, dict)): for _ in range(item['multiply_by']): yield deepcopy(item) diff --git a/pytest.ini b/pytest.ini index e8911ee3f4e..5a86ce2a71d 100644 --- a/pytest.ini +++ b/pytest.ini @@ -153,7 +153,7 @@ flake8-ignore = scrapy/exceptions.py E501 scrapy/exporters.py E501 scrapy/interfaces.py E501 - scrapy/item.py E501 E128 + scrapy/item.py E501 scrapy/link.py E501 scrapy/logformatter.py E501 scrapy/mail.py E402 E128 E501 diff --git a/scrapy/item.py b/scrapy/item.py index 3558b223154..46d20d0172f 100644 --- a/scrapy/item.py +++ b/scrapy/item.py @@ -15,18 +15,8 @@ class BaseItem(object_ref): - """Base class for all scraped items. - - In Scrapy, an object is considered an *item* if it is an instance of either - :class:`BaseItem` or :class:`dict`. For example, when the output of a - spider callback is evaluated, only instances of :class:`BaseItem` or - :class:`dict` are passed to :ref:`item pipelines `. - - If you need instances of a custom class to be considered items by Scrapy, - you must inherit from either :class:`BaseItem` or :class:`dict`. - - Unlike instances of :class:`dict`, instances of :class:`BaseItem` may be - :ref:`tracked ` to debug memory leaks. + """ + Deprecated, please use :class:`scrapy.item.Item` instead """ def __new__(cls, *args, **kwargs): @@ -91,8 +81,7 @@ def __setitem__(self, key, value): if key in self.fields: self._values[key] = value else: - raise KeyError("%s does not support field: %s" % - (self.__class__.__name__, key)) + raise KeyError("%s does not support field: %s" % (self.__class__.__name__, key)) def __delitem__(self, key): del self._values[key] @@ -104,8 +93,7 @@ def __getattr__(self, name): def __setattr__(self, name, value): if not name.startswith('_'): - raise AttributeError("Use item[%r] = %r to set field value" % - (name, value)) + raise AttributeError("Use item[%r] = %r to set field value" % (name, value)) super(DictItem, self).__setattr__(name, value) def __len__(self): @@ -132,4 +120,17 @@ def deepcopy(self): class Item(DictItem, metaclass=ItemMeta): - pass + """ + Base class for scraped items. + + In Scrapy, an object is considered an *item* if it is an instance of either + :class:`Item` or :class:`dict`. For example, when the output of a + spider callback is evaluated, only instances of :class:`Item` or + :class:`dict` are passed to :ref:`item pipelines `. + + If you need instances of a custom class to be considered items by Scrapy, + you must inherit from either :class:`Item` or :class:`dict`. + + Unlike instances of :class:`dict`, instances of :class:`Item` may be + :ref:`tracked ` to debug memory leaks. + """ diff --git a/scrapy/spiders/feed.py b/scrapy/spiders/feed.py index c566f0236ad..a4ff8010d35 100644 --- a/scrapy/spiders/feed.py +++ b/scrapy/spiders/feed.py @@ -52,7 +52,7 @@ def parse_nodes(self, response, nodes): """This method is called for the nodes matching the provided tag name (itertag). Receives the response and an Selector for each node. Overriding this method is mandatory. Otherwise, you spider won't work. - This method must return either a BaseItem, a Request, or a list + This method must return either an item, a request, or a list containing any of them. """ diff --git a/tests/test_loader.py b/tests/test_loader.py index 701d568dc0d..f14714c756d 100644 --- a/tests/test_loader.py +++ b/tests/test_loader.py @@ -601,7 +601,7 @@ class NoInputReprocessingItemLoader(BaseNoInputReprocessingLoader): class NoInputReprocessingFromItemTest(unittest.TestCase): """ - Loaders initialized from loaded items must not reprocess fields (BaseItem instances) + Loaders initialized from loaded items must not reprocess fields (Item instances) """ def test_avoid_reprocessing_with_initial_values_single(self): il = NoInputReprocessingItemLoader(item=NoInputReprocessingItem(title='foo')) diff --git a/tests/test_utils_spider.py b/tests/test_utils_spider.py index ee7d170629e..3c87268ab6a 100644 --- a/tests/test_utils_spider.py +++ b/tests/test_utils_spider.py @@ -2,7 +2,7 @@ from scrapy import Spider from scrapy.http import Request -from scrapy.item import BaseItem +from scrapy.item import Item from scrapy.utils.spider import iterate_spider_output, iter_spider_classes @@ -17,7 +17,7 @@ class MySpider2(Spider): class UtilsSpidersTestCase(unittest.TestCase): def test_iterate_spider_output(self): - i = BaseItem() + i = Item() r = Request('http://scrapytest.org') o = object() From 8d1e3ee0dd8d48428f719a0fffda964d30ab956d Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Wed, 6 May 2020 09:24:32 -0300 Subject: [PATCH 4/7] Remove deprecated BaseItem from the docs --- docs/topics/items.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/topics/items.rst b/docs/topics/items.rst index 78612f524a6..0941a8a1b72 100644 --- a/docs/topics/items.rst +++ b/docs/topics/items.rst @@ -257,6 +257,4 @@ Field objects Other classes related to Item ============================= -.. autoclass:: BaseItem - .. autoclass:: ItemMeta From 5256eae60d3685de51c1f3891abe157e15d14def Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Thu, 7 May 2020 14:37:41 -0300 Subject: [PATCH 5/7] Meta class to handle isinstance checks for BaseItem --- pytest.ini | 2 +- scrapy/commands/parse.py | 4 +-- scrapy/contracts/default.py | 8 +++--- scrapy/core/scraper.py | 4 +-- scrapy/exporters.py | 4 +-- scrapy/item.py | 20 +++++++++++-- scrapy/shell.py | 5 ++-- scrapy/utils/misc.py | 4 +-- scrapy/utils/serialize.py | 4 +-- tests/test_item.py | 56 +++++++++++++++++++++++++++++++++---- 10 files changed, 85 insertions(+), 26 deletions(-) diff --git a/pytest.ini b/pytest.ini index 5a86ce2a71d..292dbce41f2 100644 --- a/pytest.ini +++ b/pytest.ini @@ -204,7 +204,7 @@ flake8-ignore = tests/test_http_headers.py E501 tests/test_http_request.py E402 E501 E127 E128 E128 E126 E123 tests/test_http_response.py E501 E128 - tests/test_item.py E128 F841 + tests/test_item.py E128 F841 E501 tests/test_link.py E501 tests/test_linkextractors.py E501 E128 E124 tests/test_loader.py E501 E741 E128 E117 diff --git a/scrapy/commands/parse.py b/scrapy/commands/parse.py index 1cefed1061c..098827ab99b 100644 --- a/scrapy/commands/parse.py +++ b/scrapy/commands/parse.py @@ -5,7 +5,7 @@ from scrapy.commands import ScrapyCommand from scrapy.http import Request -from scrapy.item import BaseItem +from scrapy.item import _BaseItem from scrapy.utils import display from scrapy.utils.conf import arglist_to_dict from scrapy.utils.spider import iterate_spider_output, spidercls_for_request @@ -117,7 +117,7 @@ def run_callback(self, response, callback, cb_kwargs=None): items, requests = [], [] for x in iterate_spider_output(callback(response, **cb_kwargs)): - if isinstance(x, (BaseItem, dict)): + if isinstance(x, (_BaseItem, dict)): items.append(x) elif isinstance(x, Request): requests.append(x) diff --git a/scrapy/contracts/default.py b/scrapy/contracts/default.py index a1b0f8f220b..cdc2bac1556 100644 --- a/scrapy/contracts/default.py +++ b/scrapy/contracts/default.py @@ -1,6 +1,6 @@ import json -from scrapy.item import BaseItem +from scrapy.item import _BaseItem from scrapy.http import Request from scrapy.exceptions import ContractFail @@ -51,8 +51,8 @@ class ReturnsContract(Contract): objects = { 'request': Request, 'requests': Request, - 'item': (BaseItem, dict), - 'items': (BaseItem, dict), + 'item': (_BaseItem, dict), + 'items': (_BaseItem, dict), } def __init__(self, *args, **kwargs): @@ -103,7 +103,7 @@ class ScrapesContract(Contract): def post_process(self, output): for x in output: - if isinstance(x, (BaseItem, dict)): + if isinstance(x, (_BaseItem, dict)): missing = [arg for arg in self.args if arg not in x] if missing: raise ContractFail( diff --git a/scrapy/core/scraper.py b/scrapy/core/scraper.py index edbb4dd66ad..6785e103db3 100644 --- a/scrapy/core/scraper.py +++ b/scrapy/core/scraper.py @@ -14,7 +14,7 @@ from scrapy.exceptions import CloseSpider, DropItem, IgnoreRequest from scrapy import signals from scrapy.http import Request, Response -from scrapy.item import BaseItem +from scrapy.item import _BaseItem from scrapy.core.spidermw import SpiderMiddlewareManager @@ -191,7 +191,7 @@ def _process_spidermw_output(self, output, request, response, spider): """ if isinstance(output, Request): self.crawler.engine.crawl(request=output, spider=spider) - elif isinstance(output, (BaseItem, dict)): + elif isinstance(output, (_BaseItem, dict)): self.slot.itemproc_size += 1 dfd = self.itemproc.process_item(output, spider) dfd.addBoth(self._itemproc_finished, output, response, spider) diff --git a/scrapy/exporters.py b/scrapy/exporters.py index 0cb6cef9875..4731b925a86 100644 --- a/scrapy/exporters.py +++ b/scrapy/exporters.py @@ -12,7 +12,7 @@ from scrapy.utils.serialize import ScrapyJSONEncoder from scrapy.utils.python import to_bytes, to_unicode, is_listlike -from scrapy.item import BaseItem +from scrapy.item import _BaseItem from scrapy.exceptions import ScrapyDeprecationWarning @@ -312,7 +312,7 @@ def serialize_field(self, field, name, value): return serializer(value) def _serialize_value(self, value): - if isinstance(value, BaseItem): + if isinstance(value, _BaseItem): return self.export_item(value) if isinstance(value, dict): return dict(self._serialize_dict(value)) diff --git a/scrapy/item.py b/scrapy/item.py index 46d20d0172f..f468ff86f76 100644 --- a/scrapy/item.py +++ b/scrapy/item.py @@ -14,7 +14,23 @@ from scrapy.utils.trackref import object_ref -class BaseItem(object_ref): +class _BaseItem(object_ref): + """ + Temporary class used internally to avoid the deprecation + warning raised by isinstance checks using BaseItem. + """ + pass + + +class _BaseItemMeta(ABCMeta): + def __instancecheck__(cls, instance): + if cls is BaseItem: + warn('scrapy.item.BaseItem is deprecated, please use scrapy.item.Item instead', + ScrapyDeprecationWarning, stacklevel=2) + return super().__instancecheck__(instance) + + +class BaseItem(_BaseItem, metaclass=_BaseItemMeta): """ Deprecated, please use :class:`scrapy.item.Item` instead """ @@ -30,7 +46,7 @@ class Field(dict): """Container of field metadata""" -class ItemMeta(ABCMeta): +class ItemMeta(_BaseItemMeta): """Metaclass_ of :class:`Item` that handles field definitions. .. _metaclass: https://realpython.com/python-metaclasses diff --git a/scrapy/shell.py b/scrapy/shell.py index 08ce8948128..83afb74c94b 100644 --- a/scrapy/shell.py +++ b/scrapy/shell.py @@ -13,7 +13,7 @@ from scrapy.crawler import Crawler from scrapy.exceptions import IgnoreRequest from scrapy.http import Request, Response -from scrapy.item import BaseItem +from scrapy.item import _BaseItem from scrapy.settings import Settings from scrapy.spiders import Spider from scrapy.utils.console import start_python_console @@ -26,8 +26,7 @@ class Shell: - relevant_classes = (Crawler, Spider, Request, Response, BaseItem, - Settings) + relevant_classes = (Crawler, Spider, Request, Response, _BaseItem, Settings) def __init__(self, crawler, update_vars=None, code=None): self.crawler = crawler diff --git a/scrapy/utils/misc.py b/scrapy/utils/misc.py index 52cfba208b0..bfe3ccd4028 100644 --- a/scrapy/utils/misc.py +++ b/scrapy/utils/misc.py @@ -14,10 +14,10 @@ from scrapy.utils.datatypes import LocalWeakReferencedCache from scrapy.utils.python import flatten, to_unicode -from scrapy.item import BaseItem +from scrapy.item import _BaseItem -_ITERABLE_SINGLE_VALUES = dict, BaseItem, str, bytes +_ITERABLE_SINGLE_VALUES = dict, _BaseItem, str, bytes def arg_to_iter(arg): diff --git a/scrapy/utils/serialize.py b/scrapy/utils/serialize.py index 9dd72ea7124..bf73dfa18a0 100644 --- a/scrapy/utils/serialize.py +++ b/scrapy/utils/serialize.py @@ -5,7 +5,7 @@ from twisted.internet import defer from scrapy.http import Request, Response -from scrapy.item import BaseItem +from scrapy.item import _BaseItem class ScrapyJSONEncoder(json.JSONEncoder): @@ -26,7 +26,7 @@ def default(self, o): return str(o) elif isinstance(o, defer.Deferred): return str(o) - elif isinstance(o, BaseItem): + elif isinstance(o, _BaseItem): return dict(o) elif isinstance(o, Request): return "<%s %s %s>" % (type(o).__name__, o.method, o.url) diff --git a/tests/test_item.py b/tests/test_item.py index f35a2b9f9ac..6fdd7e302e7 100644 --- a/tests/test_item.py +++ b/tests/test_item.py @@ -4,7 +4,7 @@ from warnings import catch_warnings from scrapy.exceptions import ScrapyDeprecationWarning -from scrapy.item import ABCMeta, BaseItem, DictItem, Field, Item, ItemMeta +from scrapy.item import ABCMeta, _BaseItem, BaseItem, DictItem, Field, Item, ItemMeta PY36_PLUS = (sys.version_info.major >= 3) and (sys.version_info.minor >= 6) @@ -334,29 +334,73 @@ class SubclassedDictItem(DictItem): class BaseItemTest(unittest.TestCase): + def test_isinstance_check(self): + + class SubclassedBaseItem(BaseItem): + pass + + class SubclassedItem(Item): + pass + + self.assertTrue(isinstance(BaseItem(), BaseItem)) + self.assertTrue(isinstance(SubclassedBaseItem(), BaseItem)) + self.assertTrue(isinstance(Item(), BaseItem)) + self.assertTrue(isinstance(SubclassedItem(), BaseItem)) + + # make sure internal checks using private _BaseItem class succeed + self.assertTrue(isinstance(BaseItem(), _BaseItem)) + self.assertTrue(isinstance(SubclassedBaseItem(), _BaseItem)) + self.assertTrue(isinstance(Item(), _BaseItem)) + self.assertTrue(isinstance(SubclassedItem(), _BaseItem)) + def test_deprecation_warning(self): + """ + Make sure deprecation warnings are logged whenever BaseItem is used, + either instantiated or in an isinstance check + """ with catch_warnings(record=True) as warnings: BaseItem() self.assertEqual(len(warnings), 1) self.assertEqual(warnings[0].category, ScrapyDeprecationWarning) + with catch_warnings(record=True) as warnings: + class SubclassedBaseItem(BaseItem): pass + SubclassedBaseItem() self.assertEqual(len(warnings), 1) self.assertEqual(warnings[0].category, ScrapyDeprecationWarning) + with catch_warnings(record=True) as warnings: + self.assertFalse(isinstance("foo", BaseItem)) + self.assertEqual(len(warnings), 1) + self.assertEqual(warnings[0].category, ScrapyDeprecationWarning) + + with catch_warnings(record=True) as warnings: + self.assertTrue(isinstance(BaseItem(), BaseItem)) + self.assertEqual(len(warnings), 1) + self.assertEqual(warnings[0].category, ScrapyDeprecationWarning) -class ItemNoDeprecationWarningTest(unittest.TestCase): +class ItemNoDeprecationWarningTest(unittest.TestCase): def test_no_deprecation_warning(self): + """ + Make sure deprecation warnings are NOT logged whenever BaseItem subclasses are used. + """ + class SubclassedItem(Item): + pass + with catch_warnings(record=True) as warnings: Item() - self.assertEqual(len(warnings), 0) - with catch_warnings(record=True) as warnings: - class SubclassedItem(Item): - pass SubclassedItem() + _BaseItem() + self.assertFalse(isinstance("foo", _BaseItem)) + self.assertFalse(isinstance("foo", Item)) + self.assertFalse(isinstance("foo", SubclassedItem)) + self.assertTrue(isinstance(_BaseItem(), _BaseItem)) + self.assertTrue(isinstance(Item(), Item)) + self.assertTrue(isinstance(SubclassedItem(), SubclassedItem)) self.assertEqual(len(warnings), 0) From f689e917bf2caa2c48def7399317dd019264663d Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Fri, 8 May 2020 19:44:05 -0300 Subject: [PATCH 6/7] Update Item docstring --- scrapy/item.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scrapy/item.py b/scrapy/item.py index f468ff86f76..b9a0acd3dd9 100644 --- a/scrapy/item.py +++ b/scrapy/item.py @@ -79,8 +79,7 @@ class DictItem(MutableMapping, BaseItem): def __new__(cls, *args, **kwargs): if issubclass(cls, DictItem) and not issubclass(cls, Item): - warn('scrapy.item.DictItem is deprecated, please use ' - 'scrapy.item.Item instead', + warn('scrapy.item.DictItem is deprecated, please use scrapy.item.Item instead', ScrapyDeprecationWarning, stacklevel=2) return super(DictItem, cls).__new__(cls, *args, **kwargs) @@ -147,6 +146,11 @@ class Item(DictItem, metaclass=ItemMeta): If you need instances of a custom class to be considered items by Scrapy, you must inherit from either :class:`Item` or :class:`dict`. + Items offer the ability to declare :class:`Field` attributes, which can be + used to define metadata and control the way data is processed internally. + Please refer to the :ref:`documentation about fields ` + for additional information. + Unlike instances of :class:`dict`, instances of :class:`Item` may be :ref:`tracked ` to debug memory leaks. """ From abfdc1b5425997f1aa69b29465c6ab2324f37fd0 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Mon, 11 May 2020 13:20:06 -0300 Subject: [PATCH 7/7] Update docstring for Item class --- scrapy/item.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/scrapy/item.py b/scrapy/item.py index b9a0acd3dd9..97dfed9766e 100644 --- a/scrapy/item.py +++ b/scrapy/item.py @@ -138,18 +138,20 @@ class Item(DictItem, metaclass=ItemMeta): """ Base class for scraped items. - In Scrapy, an object is considered an *item* if it is an instance of either - :class:`Item` or :class:`dict`. For example, when the output of a + In Scrapy, an object is considered an ``item`` if it is an instance of either + :class:`Item` or :class:`dict`, or any subclass. For example, when the output of a spider callback is evaluated, only instances of :class:`Item` or :class:`dict` are passed to :ref:`item pipelines `. If you need instances of a custom class to be considered items by Scrapy, you must inherit from either :class:`Item` or :class:`dict`. - Items offer the ability to declare :class:`Field` attributes, which can be - used to define metadata and control the way data is processed internally. - Please refer to the :ref:`documentation about fields ` - for additional information. + Items must declare :class:`Field` attributes, which are processed and stored + in the ``fields`` attribute. This restricts the set of allowed field names + and prevents typos, raising ``KeyError`` when referring to undefined fields. + Additionally, fields can be used to define metadata and control the way + data is processed internally. Please refer to the :ref:`documentation + about fields ` for additional information. Unlike instances of :class:`dict`, instances of :class:`Item` may be :ref:`tracked ` to debug memory leaks.