-
Notifications
You must be signed in to change notification settings - Fork 114
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #81 from scrapinghub/add-option-to-return-html-nod…
…e-during-extraction [MRG+1] Add option to return html node during extraction
- Loading branch information
Showing
11 changed files
with
228 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
107 changes: 107 additions & 0 deletions
107
tests/samples/schema.org/CreativeWork_flat_with_node_id.001.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
{ | ||
"microdata": [ | ||
{ | ||
"_nodeId_": "book1", | ||
"@type": "Book", | ||
"@context": "http://schema.org", | ||
"id": "http://worldcat.org/entity/work/id/2292573321", | ||
"name": "Rouge et le noir", | ||
"inLanguage": "fr", | ||
"workTranslation": { | ||
"_nodeId_": "creativeWork1", | ||
"@type": "CreativeWork", | ||
"id": "http://worldcat.org/entity/work/id/460647", | ||
"value": "Red and Black : A New Translation, Backgrounds and Sources, Criticism" | ||
} | ||
}, | ||
{ | ||
"_nodeId_": "author1", | ||
"@type": "Person", | ||
"@context": "http://schema.org", | ||
"id": "http://viaf.org/viaf/17823", | ||
"value": "Stendhal" | ||
}, | ||
{ | ||
"_nodeId_": "book2", | ||
"@type": "Book", | ||
"@context": "http://schema.org", | ||
"id": "http://worldcat.org/entity/work/id/460647", | ||
"name": "Red and Black : A New Translation, Backgrounds and Sources, Criticism", | ||
"author": { | ||
"_nodeId_": "author2", | ||
"@type": "Person", | ||
"id": "http://viaf.org/viaf/17823", | ||
"value": "Stendhal" | ||
}, | ||
"inLanguage": "en", | ||
"about": "Psychological fiction, French", | ||
"translationOfWork": { | ||
"_nodeId_": "creativeWork2", | ||
"@type": "CreativeWork", | ||
"id": "http://worldcat.org/entity/work/id/2292573321", | ||
"value": "Rouge et le noir" | ||
} | ||
}, | ||
{ | ||
"_nodeId_": "translator2", | ||
"@type": "Person", | ||
"@context": "http://schema.org", | ||
"id": "http://viaf.org/viaf/8453420", | ||
"value": "Robert Martin Adams" | ||
} | ||
], | ||
"json-ld": [ | ||
{ | ||
"@context": "http://schema.org", | ||
"@type": "WebPage", | ||
"breadcrumb": "Books > Literature & Fiction > Classics", | ||
"mainEntity": { | ||
"@type": "Book", | ||
"author": "/author/jd_salinger.html", | ||
"bookFormat": "http://schema.org/Paperback", | ||
"datePublished": "1991-05-01", | ||
"image": "catcher-in-the-rye-book-cover.jpg", | ||
"inLanguage": "English", | ||
"isbn": "0316769487", | ||
"name": "The Catcher in the Rye", | ||
"numberOfPages": "224", | ||
"offers": { | ||
"@type": "Offer", | ||
"availability": "http://schema.org/InStock", | ||
"price": "6.99", | ||
"priceCurrency": "USD" | ||
}, | ||
"publisher": "Little, Brown, and Company", | ||
"aggregateRating": { | ||
"@type": "AggregateRating", | ||
"ratingValue": "4", | ||
"reviewCount": "3077" | ||
}, | ||
"review": [ | ||
{ | ||
"@type": "Review", | ||
"author": "John Doe", | ||
"datePublished": "2006-05-04", | ||
"name": "A masterpiece of literature", | ||
"reviewBody": "I really enjoyed this book. It captures the essential challenge people face as they try make sense of their lives and grow to adulthood.", | ||
"reviewRating": { | ||
"@type": "Rating", | ||
"ratingValue": "5" | ||
} | ||
}, | ||
{ | ||
"@type": "Review", | ||
"author": "Bob Smith", | ||
"datePublished": "2006-06-15", | ||
"name": "A good read.", | ||
"reviewBody": "Catcher in the Rye is a fun book. It's a good book to read.", | ||
"reviewRating": "4" | ||
} | ||
] | ||
} | ||
} | ||
], | ||
"opengraph": [], | ||
"microformat": [], | ||
"rdfa": [] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
23 changes: 23 additions & 0 deletions
23
tests/samples/schema.org/product_custom_url_and_node_id.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
[{"type": "http://schema.org/Product", | ||
"_nodeId_": "product", | ||
"properties": {"brand": "ACME", | ||
"name": "Executive Anvil", | ||
"image": "http://some-example.com/anvil_executive.jpg", | ||
"description": "Sleeker than ACME's Classic Anvil, the\n Executive Anvil is perfect for the business traveler\n looking for something to drop from a height.", | ||
"mpn": "925872", | ||
"aggregateRating": {"type": "http://schema.org/AggregateRating", | ||
"_nodeId_": "aggregateRating", | ||
"properties": {"ratingValue": "4.4", | ||
"reviewCount": "89"}}, | ||
"offers": {"type": "http://schema.org/Offer", | ||
"_nodeId_": "offer", | ||
"properties": {"priceCurrency": "USD", | ||
"price": "119.99", | ||
"priceValidUntil": "2020-11-05", | ||
"seller": {"type": "http://schema.org/Organization", | ||
"_nodeId_": "organization", | ||
"properties":{"name": "Executive Objects"}}, | ||
"itemCondition": "http://schema.org/UsedCondition", | ||
"availability": "http://schema.org/InStock"}} | ||
} | ||
}] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.