From f6ed9598f4cd978ec8dec8f9d40056382bca4bda Mon Sep 17 00:00:00 2001 From: Ihor Date: Mon, 24 Mar 2025 11:50:38 +0200 Subject: [PATCH 1/2] Skip empty JSON-LD scripts by trimming and skipping empty input --- extruct/jsonld.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extruct/jsonld.py b/extruct/jsonld.py index 989ac04a..b1ffbe27 100644 --- a/extruct/jsonld.py +++ b/extruct/jsonld.py @@ -33,7 +33,8 @@ def extract_items(self, document, base_url=None): ] def _extract_items(self, node): - script = node.xpath("string()") + script = node.xpath("string()").strip() + if not script: return try: # TODO: `strict=False` can be configurable if needed data = json.loads(script, strict=False) From f02cf2397d27ae55e2d99932d8465b37bc9ddcd9 Mon Sep 17 00:00:00 2001 From: adnim Date: Mon, 24 Mar 2025 11:15:52 +0100 Subject: [PATCH 2/2] Add test for empty JSON-LD scripts and fix formatting --- extruct/jsonld.py | 3 ++- tests/test_jsonld.py | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/extruct/jsonld.py b/extruct/jsonld.py index b1ffbe27..d25a4183 100644 --- a/extruct/jsonld.py +++ b/extruct/jsonld.py @@ -34,7 +34,8 @@ def extract_items(self, document, base_url=None): def _extract_items(self, node): script = node.xpath("string()").strip() - if not script: return + if not script: + return try: # TODO: `strict=False` can be configurable if needed data = json.loads(script, strict=False) diff --git a/tests/test_jsonld.py b/tests/test_jsonld.py index d274b3d1..178b3229 100644 --- a/tests/test_jsonld.py +++ b/tests/test_jsonld.py @@ -63,3 +63,9 @@ def test_null(self): jsonlde = JsonLdExtractor() data = jsonlde.extract(body) self.assertEqual(data, expected) + + def test_empty_jsonld_script(self): + jsonlde = JsonLdExtractor() + body = '' + data = jsonlde.extract(body) + self.assertEqual(data, [])