Skip to content

Commit

Permalink
Merge pull request #56 from mikhuang/master
Browse files Browse the repository at this point in the history
Account for if jsonld is null
  • Loading branch information
wRAR committed Mar 5, 2021
2 parents c53d43c + a44d45d commit 057919d
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 3 deletions.
7 changes: 4 additions & 3 deletions extruct/jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ def _extract_items(self, node):
data = json.loads(script, strict=False)
except ValueError:
# sometimes JSON-decoding errors are due to leading HTML or JavaScript comments
data = jstyleson.loads(HTML_OR_JS_COMMENTLINE.sub('', script),strict=False)
data = jstyleson.loads(HTML_OR_JS_COMMENTLINE.sub('', script), strict=False)
if isinstance(data, list):
return data
for item in data:
yield item
elif isinstance(data, dict):
return [data]
yield data
1 change: 1 addition & 0 deletions tests/samples/misc/null_ld_mock.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<script type="application/ld+json">null</script><script type="application/ld+json">null</script><script type="application/ld+json">null</script><script type="application/ld+json">{"\u0040context":"http:\/\/schema.org","\u0040type":"LocalBusiness","name":"Some Name Goes Here","address":{"\u0040type":"PostalAddress","streetAddress":"123 Munroe Hwy","addressLocality":"Tacoma, Georgia","addressRegion":"Georgia","postalCode":"52342"},"aggregateRating":{"\u0040type":"AggregateRating","ratingValue":5,"ratingCount":280}}</script><script type="application/ld+json">null</script><script type="application/ld+json">null</script><script type="application/ld+json">{"\u0040context":"http:\/\/schema.org","\u0040type":"Review","name":"","reviewBody":"Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla pariatur?"}</script><script type="application/ld+json">null</script><script type="application/ld+json">null</script><script type="application/ld+json">null</script><script type="application/ld+json">null</script><script type="application/ld+json">null</script>
25 changes: 25 additions & 0 deletions tests/samples/misc/null_ld_mock.jsonld
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[
{
"@context": "http://schema.org",
"address": {
"addressLocality": "Tacoma, Georgia",
"addressRegion": "Georgia",
"streetAddress": "123 Munroe Hwy",
"postalCode": "52342",
"@type": "PostalAddress"
},
"aggregateRating": {
"ratingCount": 280,
"@type": "AggregateRating",
"ratingValue": 5
},
"@type": "LocalBusiness",
"name": "Some Name Goes Here"
},
{
"@context": "http://schema.org",
"reviewBody": "Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla pariatur?",
"@type": "Review",
"name": ""
}
]
9 changes: 9 additions & 0 deletions tests/test_jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,12 @@ def _check_jsonld(self, body, expected):
jsonlde = JsonLdExtractor()
data = jsonlde.extract(body)
self.assertEqual(data, expected)

def test_null(self):
page = "null_ld_mock"
body = get_testdata('misc', '{}.html'.format(page))
expected = json.loads(get_testdata('misc', '{}.jsonld'.format(page)).decode('UTF-8'))

jsonlde = JsonLdExtractor()
data = jsonlde.extract(body)
self.assertEqual(data, expected)

0 comments on commit 057919d

Please sign in to comment.