Skip to content

Commit

Permalink
Merge pull request #96 from scrapinghub/fix-jsonld-return-none-bug
Browse files Browse the repository at this point in the history
fix bug
  • Loading branch information
kmike committed Nov 1, 2018
2 parents b459dbf + fd7d67c commit 18f41f1
Show file tree
Hide file tree
Showing 4 changed files with 1,740 additions and 6 deletions.
12 changes: 7 additions & 5 deletions extruct/jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ def extract(self, htmlstring, base_url=None, encoding="UTF-8"):
return self.extract_items(tree, base_url=base_url)

def extract_items(self, document, base_url=None):
return [item for items in map(self._extract_items,
self._xp_jsonld(document))
for item in items
if item]
return [
item
for items in map(self._extract_items, self._xp_jsonld(document))
if items for item in items if item
]

def _extract_items(self, node):
script = node.xpath('string()')
Expand All @@ -33,7 +34,8 @@ def _extract_items(self, node):
data = json.loads(script, strict=False)
except ValueError:
# sometimes JSON-decoding errors are due to leading HTML or JavaScript comments
data = json.loads(HTML_OR_JS_COMMENTLINE.sub('', script), strict=False)
data = json.loads(
HTML_OR_JS_COMMENTLINE.sub('', script), strict=False)
if isinstance(data, list):
return data
elif isinstance(data, dict):
Expand Down

0 comments on commit 18f41f1

Please sign in to comment.