From 1c96c8f539c45bc0a5088318d5fc5a5a5e3c89e8 Mon Sep 17 00:00:00 2001
From: granitosaurus <tinarg@protonmail.com>
Date: Fri, 25 Jan 2019 06:57:09 +0000
Subject: [PATCH 1/8] change jsonldextractor to extract raw

---
 extruct/jsonld.py | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/extruct/jsonld.py b/extruct/jsonld.py
index f11580eb..49f94e50 100644
--- a/extruct/jsonld.py
+++ b/extruct/jsonld.py
@@ -16,26 +16,24 @@
 class JsonLdExtractor(object):
     _xp_jsonld = lxml.etree.XPath('descendant-or-self::script[@type="application/ld+json"]')
 
-    def extract(self, htmlstring, base_url=None, encoding="UTF-8"):
+    def extract(self, htmlstring, base_url=None, encoding="UTF-8", as_json=False):
         tree = parse_html(htmlstring, encoding=encoding)
-        return self.extract_items(tree, base_url=base_url)
+        return self.extract_items(tree, base_url=base_url, as_json=as_json)
 
-    def extract_items(self, document, base_url=None):
+    def extract_items(self, document, base_url=None, as_json=False):
         return [
             item
-            for items in map(self._extract_items, self._xp_jsonld(document))
+            for items in map(self._extract_items_raw if as_json else self.extract_items, self._xp_jsonld(document))
             if items for item in items if item
         ]
 
+    def _extract_items_raw(self, node):
+        return HTML_OR_JS_COMMENTLINE.sub('', node.xpath('string()'))
+
     def _extract_items(self, node):
-        script = node.xpath('string()')
-        try:
-            # TODO: `strict=False` can be configurable if needed
-            data = json.loads(script, strict=False)
-        except ValueError:
-            # sometimes JSON-decoding errors are due to leading HTML or JavaScript comments
-            data = json.loads(
-                HTML_OR_JS_COMMENTLINE.sub('', script), strict=False)
+        script = self._extract_items_raw(node)
+        # TODO: `strict=False` can be configurable if needed
+        data = json.loads(script, strict=False)
         if isinstance(data, list):
             return data
         elif isinstance(data, dict):

From efe64cbf72ec349b95a1c66aa971a5fda9213cef Mon Sep 17 00:00:00 2001
From: granitosaurus <tinarg@protonmail.com>
Date: Fri, 25 Jan 2019 07:07:46 +0000
Subject: [PATCH 2/8] fix typo

---
 extruct/jsonld.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extruct/jsonld.py b/extruct/jsonld.py
index 49f94e50..8b8c16d4 100644
--- a/extruct/jsonld.py
+++ b/extruct/jsonld.py
@@ -23,7 +23,7 @@ def extract(self, htmlstring, base_url=None, encoding="UTF-8", as_json=False):
     def extract_items(self, document, base_url=None, as_json=False):
         return [
             item
-            for items in map(self._extract_items_raw if as_json else self.extract_items, self._xp_jsonld(document))
+            for items in map(self._extract_items_raw if as_json else self._extract_items, self._xp_jsonld(document))
             if items for item in items if item
         ]
 

From 7fa3a47fcf44b3181e130308f62d40343d8cce9b Mon Sep 17 00:00:00 2001
From: granitosaurus <tinarg@protonmail.com>
Date: Mon, 28 Jan 2019 09:23:30 +0000
Subject: [PATCH 3/8] rename kwarg as_json to parse_json

---
 extruct/jsonld.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/extruct/jsonld.py b/extruct/jsonld.py
index 8b8c16d4..dcc1f1e8 100644
--- a/extruct/jsonld.py
+++ b/extruct/jsonld.py
@@ -16,14 +16,15 @@
 class JsonLdExtractor(object):
     _xp_jsonld = lxml.etree.XPath('descendant-or-self::script[@type="application/ld+json"]')
 
-    def extract(self, htmlstring, base_url=None, encoding="UTF-8", as_json=False):
+    def extract(self, htmlstring, base_url=None, encoding="UTF-8", parse_json=True):
         tree = parse_html(htmlstring, encoding=encoding)
-        return self.extract_items(tree, base_url=base_url, as_json=as_json)
+        return self.extract_items(tree, base_url=base_url, parse_json=parse_json)
 
-    def extract_items(self, document, base_url=None, as_json=False):
+    def extract_items(self, document, base_url=None, parse_json=True):
         return [
             item
-            for items in map(self._extract_items_raw if as_json else self._extract_items, self._xp_jsonld(document))
+            for items in map(self._extract_items_raw if not parse_json
+                             else self._extract_items, self._xp_jsonld(document))
             if items for item in items if item
         ]
 

From 2f6fe4fbe83c04eb8f739e5d41d5b5c568d0b37c Mon Sep 17 00:00:00 2001
From: granitosaurus <tinarg@protonmail.com>
Date: Mon, 28 Jan 2019 10:20:19 +0000
Subject: [PATCH 4/8] revert to try/except for loading json with comments

---
 extruct/jsonld.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/extruct/jsonld.py b/extruct/jsonld.py
index dcc1f1e8..dcc985ef 100644
--- a/extruct/jsonld.py
+++ b/extruct/jsonld.py
@@ -29,12 +29,15 @@ def extract_items(self, document, base_url=None, parse_json=True):
         ]
 
     def _extract_items_raw(self, node):
-        return HTML_OR_JS_COMMENTLINE.sub('', node.xpath('string()'))
+        return node.xpath('string()')
 
     def _extract_items(self, node):
         script = self._extract_items_raw(node)
         # TODO: `strict=False` can be configurable if needed
-        data = json.loads(script, strict=False)
+        try:
+            data = json.loads(script, strict=False)
+        except ValueError:
+            data = json.loads(HTML_OR_JS_COMMENTLINE.sub('', script), strict=False)
         if isinstance(data, list):
             return data
         elif isinstance(data, dict):

From d9564d0974ca7d245755574d055fc7726d5e79ab Mon Sep 17 00:00:00 2001
From: granitosaurus <tinarg@protonmail.com>
Date: Tue, 29 Jan 2019 01:39:39 +0000
Subject: [PATCH 5/8] adjust comments

---
 extruct/jsonld.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/extruct/jsonld.py b/extruct/jsonld.py
index dcc985ef..cb57e290 100644
--- a/extruct/jsonld.py
+++ b/extruct/jsonld.py
@@ -33,10 +33,11 @@ def _extract_items_raw(self, node):
 
     def _extract_items(self, node):
         script = self._extract_items_raw(node)
-        # TODO: `strict=False` can be configurable if needed
         try:
+            # TODO: `strict=False` can be configurable if needed
             data = json.loads(script, strict=False)
         except ValueError:
+            # sometimes JSON-decoding errors are due to leading HTML or JavaScript comments
             data = json.loads(HTML_OR_JS_COMMENTLINE.sub('', script), strict=False)
         if isinstance(data, list):
             return data

From 08a43db83a68b492f551e1c1574b50b16110ea9c Mon Sep 17 00:00:00 2001
From: granitosaurus <tinarg@protonmail.com>
Date: Tue, 29 Jan 2019 01:46:29 +0000
Subject: [PATCH 6/8] add parse_json argument to rdfa extractor; update readme
 with parse_json examples for jsonld and rdfa extractors

---
 README.rst      |  4 ++++
 extruct/rdfa.py | 10 ++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/README.rst b/README.rst
index 1c71c690..c96c9f98 100644
--- a/README.rst
+++ b/README.rst
@@ -393,6 +393,8 @@ JSON-LD extraction
     'jobTitle': 'Graduate research assistant',
     'name': 'John Doe',
     'url': 'http://www.example.com'}]
+ >>> # raw json output is also possible:
+ >>> raw_json = jslde.extract(parse_json=False)
 
 
 RDFa extraction (experimental)
@@ -437,6 +439,8 @@ RDFa extraction (experimental)
                                                  'photos than I do:\n'
                                                  '      '}],
     'http://schema.org/creator': [{'@id': 'http://www.example.com/index.html#me'}]}]
+ >>> # raw json output is also possible:
+ >>> raw_json = rdfae.extract(parse_json=False)
 
 You'll get a list of expanded JSON-LD nodes.
 
diff --git a/extruct/rdfa.py b/extruct/rdfa.py
index e5ab06bd..81c7bc41 100644
--- a/extruct/rdfa.py
+++ b/extruct/rdfa.py
@@ -30,11 +30,11 @@
 class RDFaExtractor(object):
 
     def extract(self, htmlstring, base_url=None, encoding="UTF-8",
-                expanded=True):
+                expanded=True, parse_json=True):
         tree = parse_xmldom_html(htmlstring, encoding=encoding)
-        return self.extract_items(tree, base_url=base_url, expanded=expanded)
+        return self.extract_items(tree, base_url=base_url, expanded=expanded, parse_json=parse_json)
 
-    def extract_items(self, document, base_url=None, expanded=True):
+    def extract_items(self, document, base_url=None, expanded=True, parse_json=True):
         options = Options(output_processor_graph=True,
                           embedded_rdf=False,
                           space_preserve=True,
@@ -46,4 +46,6 @@ def extract_items(self, document, base_url=None, expanded=True):
 
         g = PyRdfa(options, base=base_url).graph_from_DOM(document, graph=Graph(), pgraph=Graph())
         jsonld_string = g.serialize(format='json-ld', auto_compact=not expanded).decode('utf-8')
-        return json.loads(jsonld_string)
+        if parse_json:
+            return json.loads(jsonld_string)
+        return jsonld_string

From 7f1b66f37793e6a0e97c08fab3831a77a547993a Mon Sep 17 00:00:00 2001
From: granitosaurus <tinarg@protonmail.com>
Date: Sat, 4 Jan 2020 08:27:09 +0000
Subject: [PATCH 7/8] fix json-ld raw extraction separating characters
 unnecessarily

---
 extruct/jsonld.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/extruct/jsonld.py b/extruct/jsonld.py
index cb57e290..62268fa2 100644
--- a/extruct/jsonld.py
+++ b/extruct/jsonld.py
@@ -21,10 +21,11 @@ def extract(self, htmlstring, base_url=None, encoding="UTF-8", parse_json=True):
         return self.extract_items(tree, base_url=base_url, parse_json=parse_json)
 
     def extract_items(self, document, base_url=None, parse_json=True):
+        if not parse_json:
+            return [self._extract_items_raw(item) for item in self._xp_jsonld(document)]
         return [
             item
-            for items in map(self._extract_items_raw if not parse_json
-                             else self._extract_items, self._xp_jsonld(document))
+            for items in map(self._extract_items, self._xp_jsonld(document))
             if items for item in items if item
         ]
 

From ff22c727f1e2e70db0e003a10291d349b36c4e8d Mon Sep 17 00:00:00 2001
From: granitosaurus <tinarg@protonmail.com>
Date: Sat, 4 Jan 2020 08:27:52 +0000
Subject: [PATCH 8/8] add tests for parse_json=False flag for rdfa and jsonld
 extractors

---
 .../misc/Portfolio_Niels_Lubberman.json       | 147 +++++++++++-------
 ...e Owl Music Parlor, 31 Oct 2015.raw.jsonld |   1 +
 tests/test_jsonld.py                          |  11 +-
 tests/test_rdfa.py                            |  14 +-
 4 files changed, 112 insertions(+), 61 deletions(-)
 create mode 100644 tests/samples/songkick/Elysian Fields Brooklyn Tickets, The Owl Music Parlor, 31 Oct 2015.raw.jsonld

diff --git a/tests/samples/misc/Portfolio_Niels_Lubberman.json b/tests/samples/misc/Portfolio_Niels_Lubberman.json
index fb4364fd..490268e0 100644
--- a/tests/samples/misc/Portfolio_Niels_Lubberman.json
+++ b/tests/samples/misc/Portfolio_Niels_Lubberman.json
@@ -1,42 +1,24 @@
 [
   {
-    "@type": ["http://xmlns.com/foaf/0.1/Image"],
-    "@id": "http://nielslubberman.nl/drupal/sites/default/files/styles/smallcrop/public/projects/pictures/pro_poster_jobs.png?itok=QUE2ZKFT&sc=259cbe26e2b9c2489443d05fdcd3f824"
+    "@id": "http://nielslubberman.nl/drupal/sites/default/files/images/headlines/headline_download.png",
+    "@type": [
+      "http://xmlns.com/foaf/0.1/Image"
+    ]
   },
   {
-    "@type": ["http://xmlns.com/foaf/0.1/Image"],
-    "@id": "http://nielslubberman.nl/drupal/sites/default/files/styles/smallcrop/public/projects/pictures/pro_kanaal.jpg?itok=cV8u1cxa&sc=b496d2d94d76a056d4e6efde1cfb2abe"
-  },
-  {
-    "@type": ["http://xmlns.com/foaf/0.1/Image"],
-    "@id": "http://nielslubberman.nl/drupal/sites/default/files/styles/smallcrop/public/projects/pictures/pro_asppolgs_final.png?itok=apZpSYdS&sc=7ade4b4c9baeea7a86bad48589f9649d"
-  },
-  {
-    "@type": ["http://xmlns.com/foaf/0.1/Image"],
-    "@id": "http://nielslubberman.nl/drupal/sites/default/files/styles/smallcrop/public/projects/pictures/pro_poster_tim_berners_lee.png?itok=DghJBlqt&sc=259cbe26e2b9c2489443d05fdcd3f824"
-  },
-  {
-    "@type": ["http://xmlns.com/foaf/0.1/Image"],
-    "@id": "http://nielslubberman.nl/drupal/sites/default/files/styles/smallcrop/public/projects/pictures/pro_poster_roemenie.png?itok=pScuIyeN&sc=363eea0a2ddd62c554241fc1fed1f3bc"
-  },
-  {
-    "http://www.w3.org/1999/xhtml/vocab#icon": [
-      {
-        "@id": "http://nielslubberman.nl/drupal/misc/favicon.ico"
-      }
-    ],
+    "@id": "http://nielslubberman.nl/drupal/",
     "http://purl.org/rss/1.0/modules/content/encoded": [
       {
-        "@value": "<p xml:lang=\"en\" xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:content=\"http://purl.org/rss/1.0/modules/content/\" xmlns:dc=\"http://purl.org/dc/terms/\" xmlns:foaf=\"http://xmlns.com/foaf/0.1/\" xmlns:og=\"http://ogp.me/ns#\" xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\" xmlns:sioc=\"http://rdfs.org/sioc/ns#\" xmlns:sioct=\"http://rdfs.org/sioc/types#\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\">Op deze vernieuwde website kunt u enkele van mijn projecten vinden, tevens kunt u lessen downloaden die ik heb gemaakt.</p>\n\n",
-        "@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"
+        "@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral",
+        "@value": "<p xmlns:content=\"http://purl.org/rss/1.0/modules/content/\" xmlns:dc=\"http://purl.org/dc/terms/\" xmlns:foaf=\"http://xmlns.com/foaf/0.1/\" xmlns:og=\"http://ogp.me/ns#\" xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\" xmlns:sioc=\"http://rdfs.org/sioc/ns#\" xmlns:sioct=\"http://rdfs.org/sioc/types#\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\" xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\">Voeg mij nu toe aan uw professionele netwerk op LinkedIn.</p>\n\n"
       },
       {
-        "@value": "<p xml:lang=\"en\" xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:content=\"http://purl.org/rss/1.0/modules/content/\" xmlns:dc=\"http://purl.org/dc/terms/\" xmlns:foaf=\"http://xmlns.com/foaf/0.1/\" xmlns:og=\"http://ogp.me/ns#\" xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\" xmlns:sioc=\"http://rdfs.org/sioc/ns#\" xmlns:sioct=\"http://rdfs.org/sioc/types#\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\">Voeg mij nu toe aan uw professionele netwerk op LinkedIn.</p>\n\n",
-        "@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"
+        "@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral",
+        "@value": "<p xmlns:content=\"http://purl.org/rss/1.0/modules/content/\" xmlns:dc=\"http://purl.org/dc/terms/\" xmlns:foaf=\"http://xmlns.com/foaf/0.1/\" xmlns:og=\"http://ogp.me/ns#\" xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\" xmlns:sioc=\"http://rdfs.org/sioc/ns#\" xmlns:sioct=\"http://rdfs.org/sioc/types#\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\" xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\">Op deze vernieuwde website kunt u enkele van mijn projecten vinden, tevens kunt u lessen downloaden die ik heb gemaakt.</p>\n\n"
       },
       {
-        "@value": "<p xml:lang=\"en\" xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:content=\"http://purl.org/rss/1.0/modules/content/\" xmlns:dc=\"http://purl.org/dc/terms/\" xmlns:foaf=\"http://xmlns.com/foaf/0.1/\" xmlns:og=\"http://ogp.me/ns#\" xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\" xmlns:sioc=\"http://rdfs.org/sioc/ns#\" xmlns:sioct=\"http://rdfs.org/sioc/types#\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\">Met behulp van de pijl hieronder kunt u mijn CV downloaden.</p>\n\n",
-        "@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"
+        "@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral",
+        "@value": "<p xmlns:content=\"http://purl.org/rss/1.0/modules/content/\" xmlns:dc=\"http://purl.org/dc/terms/\" xmlns:foaf=\"http://xmlns.com/foaf/0.1/\" xmlns:og=\"http://ogp.me/ns#\" xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\" xmlns:sioc=\"http://rdfs.org/sioc/ns#\" xmlns:sioct=\"http://rdfs.org/sioc/types#\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\" xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\">Met behulp van de pijl hieronder kunt u mijn CV downloaden.</p>\n\n"
       }
     ],
     "http://www.w3.org/1999/xhtml/vocab#alternate": [
@@ -44,63 +26,112 @@
         "@id": "http://nielslubberman.nl/drupal/?q=rss.xml"
       }
     ],
-    "@id": "http://nielslubberman.nl/drupal/"
+    "http://www.w3.org/1999/xhtml/vocab#icon": [
+      {
+        "@id": "http://nielslubberman.nl/drupal/misc/favicon.ico"
+      }
+    ]
   },
   {
-    "@type": ["http://xmlns.com/foaf/0.1/Image"],
-    "@id": "http://nielslubberman.nl/drupal/sites/default/files/images/headlines/headline_linkedin.png"
+    "@id": "http://nielslubberman.nl/drupal/sites/default/files/styles/smallcrop/public/projects/pictures/pro_poster_turing.png?itok=anlTc5N6&sc=259cbe26e2b9c2489443d05fdcd3f824",
+    "@type": [
+      "http://xmlns.com/foaf/0.1/Image"
+    ]
   },
   {
-    "@type": ["http://xmlns.com/foaf/0.1/Image"],
-    "@id": "http://nielslubberman.nl/drupal/sites/default/files/images/headlines/headline_download.png"
+    "@id": "http://nielslubberman.nl/drupal/sites/default/files/styles/smallcrop/public/projects/pictures/pro_poster_jobs.png?itok=QUE2ZKFT&sc=259cbe26e2b9c2489443d05fdcd3f824",
+    "@type": [
+      "http://xmlns.com/foaf/0.1/Image"
+    ]
   },
   {
-    "@type": ["http://xmlns.com/foaf/0.1/Image"],
-    "@id": "http://nielslubberman.nl/drupal/sites/default/files/styles/smallcrop/public/projects/pictures/pro_poster_turing.png?itok=anlTc5N6&sc=259cbe26e2b9c2489443d05fdcd3f824"
+    "@id": "http://nielslubberman.nl/drupal/sites/default/files/images/projects/grey_img_2.png",
+    "@type": [
+      "http://xmlns.com/foaf/0.1/Image"
+    ]
   },
   {
-    "@type": ["http://xmlns.com/foaf/0.1/Image"],
-    "@id": "http://nielslubberman.nl/drupal/sites/default/files/styles/smallcrop/public/projects/pictures/pro_ru_secure.png?itok=2-xDWirb&sc=67c0f518676aaf034a9215a0ec7e9e1e"
+    "@id": "http://nielslubberman.nl/drupal/sites/default/files/styles/smallcrop/public/projects/pictures/pro_poster_roemenie.png?itok=pScuIyeN&sc=363eea0a2ddd62c554241fc1fed1f3bc",
+    "@type": [
+      "http://xmlns.com/foaf/0.1/Image"
+    ]
   },
   {
-    "@type": ["http://xmlns.com/foaf/0.1/Image"],
-    "@id": "http://nielslubberman.nl/drupal/sites/default/files/styles/smallcrop/public/projects/pictures/1979%20Nicolae%20si%20Nicu%20Ceausescu%20la%20Canal.JPG?itok=CYcBbx1w&sc=3e5afd5e3e8746f6db8fb6f52a325372"
+    "@id": "http://nielslubberman.nl/drupal/sites/default/files/styles/smallcrop/public/projects/pictures/pro_kanaal.jpg?itok=cV8u1cxa&sc=b496d2d94d76a056d4e6efde1cfb2abe",
+    "@type": [
+      "http://xmlns.com/foaf/0.1/Image"
+    ]
   },
   {
-    "@type": ["http://xmlns.com/foaf/0.1/Image"],
-    "@id": "http://nielslubberman.nl/drupal/sites/default/files/images/projects/grey_img_1.png"
+    "@id": "http://nielslubberman.nl/drupal/sites/default/files/styles/smallcrop/public/projects/pictures/pro_poster_gates.png?itok=sIwGJEG3&sc=259cbe26e2b9c2489443d05fdcd3f824",
+    "@type": [
+      "http://xmlns.com/foaf/0.1/Image"
+    ]
   },
   {
-    "@type": ["http://xmlns.com/foaf/0.1/Image"],
-    "@id": "http://nielslubberman.nl/drupal/sites/default/files/styles/smallcrop/public/projects/pictures/pro_poster_gates.png?itok=sIwGJEG3&sc=259cbe26e2b9c2489443d05fdcd3f824"
+    "@id": "http://nielslubberman.nl/drupal/sites/default/files/styles/smallcrop/public/projects/pictures/1979%20Nicolae%20si%20Nicu%20Ceausescu%20la%20Canal.JPG?itok=CYcBbx1w&sc=3e5afd5e3e8746f6db8fb6f52a325372",
+    "@type": [
+      "http://xmlns.com/foaf/0.1/Image"
+    ]
   },
   {
-    "@type": ["http://xmlns.com/foaf/0.1/Image"],
-    "@id": "http://nielslubberman.nl/drupal/sites/default/files/images/projects/grey_img_3.png"
+    "@id": "http://nielslubberman.nl/drupal/sites/default/files/styles/smallcrop/public/projects/pictures/pro_poster_tim_berners_lee.png?itok=DghJBlqt&sc=259cbe26e2b9c2489443d05fdcd3f824",
+    "@type": [
+      "http://xmlns.com/foaf/0.1/Image"
+    ]
   },
   {
-    "@type": ["http://xmlns.com/foaf/0.1/Image"],
-    "@id": "http://nielslubberman.nl/drupal/sites/default/files/images/projects/grey_img_2.png"
+    "@id": "http://nielslubberman.nl/drupal/sites/default/files/images/headlines/headline_linkedin.png",
+    "@type": [
+      "http://xmlns.com/foaf/0.1/Image"
+    ]
   },
   {
-    "http://www.w3.org/2004/02/skos/core#prefLabel": [
+    "@id": "http://nielslubberman.nl/drupal/sites/default/files/images/projects/grey_img_1.png",
+    "@type": [
+      "http://xmlns.com/foaf/0.1/Image"
+    ]
+  },
+  {
+    "@id": "http://nielslubberman.nl/drupal/sites/default/files/images/projects/grey_img_4.png",
+    "@type": [
+      "http://xmlns.com/foaf/0.1/Image"
+    ]
+  },
+  {
+    "@id": "http://nielslubberman.nl/drupal/sites/default/files/images/projects/grey_img_3.png",
+    "@type": [
+      "http://xmlns.com/foaf/0.1/Image"
+    ]
+  },
+  {
+    "@id": "http://nielslubberman.nl/drupal/sites/default/files/styles/smallcrop/public/projects/pictures/pro_ru_secure.png?itok=2-xDWirb&sc=67c0f518676aaf034a9215a0ec7e9e1e",
+    "@type": [
+      "http://xmlns.com/foaf/0.1/Image"
+    ]
+  },
+  {
+    "@id": "http://nielslubberman.nl/drupal/sites/default/files/styles/smallcrop/public/projects/pictures/pro_asppolgs_final.png?itok=apZpSYdS&sc=7ade4b4c9baeea7a86bad48589f9649d",
+    "@type": [
+      "http://xmlns.com/foaf/0.1/Image"
+    ]
+  },
+  {
+    "@id": "http://nielslubberman.nl/drupal/?q=taxonomy/term/1",
+    "@type": [
+      "http://www.w3.org/2004/02/skos/core#Concept"
+    ],
+    "http://www.w3.org/2000/01/rdf-schema#label": [
       {
         "@language": "en",
         "@value": "Geschiedenis"
       }
     ],
-    "http://www.w3.org/2000/01/rdf-schema#label": [
+    "http://www.w3.org/2004/02/skos/core#prefLabel": [
       {
         "@language": "en",
         "@value": "Geschiedenis"
       }
-    ],
-    "@type": ["http://www.w3.org/2004/02/skos/core#Concept"],
-    "@id": "http://nielslubberman.nl/drupal/?q=taxonomy/term/1"
-  },
-  {
-    "@type": ["http://xmlns.com/foaf/0.1/Image"],
-    "@id": "http://nielslubberman.nl/drupal/sites/default/files/images/projects/grey_img_4.png"
+    ]
   }
 ]
-
diff --git a/tests/samples/songkick/Elysian Fields Brooklyn Tickets, The Owl Music Parlor, 31 Oct 2015.raw.jsonld b/tests/samples/songkick/Elysian Fields Brooklyn Tickets, The Owl Music Parlor, 31 Oct 2015.raw.jsonld
new file mode 100644
index 00000000..bb0ef0dd
--- /dev/null
+++ b/tests/samples/songkick/Elysian Fields Brooklyn Tickets, The Owl Music Parlor, 31 Oct 2015.raw.jsonld	
@@ -0,0 +1 @@
+[{"@context":"http://schema.org","@type":"MusicEvent","name":"Elysian Fields","url":"http://www.songkick.com/concerts/25248299-elysian-fields-at-owl-music-parlor?utm_medium=organic\u0026utm_source=microformat","location":{"@type":"Place","address":{"@type":"PostalAddress","addressLocality":"Brooklyn","addressCountry":"US","addressRegion":"NY","streetAddress":"497 Rogers Ave","postalCode":"11225"},"name":"The Owl Music Parlor","sameAs":"http://www.theowl.nyc","geo":{"@type":"GeoCoordinates","latitude":40.660109,"longitude":-73.953193}},"startDate":"2015-10-31T19:30:00-0400","performer":[{"@type":"MusicGroup","name":"Elysian Fields","sameAs":"http://www.songkick.com/artists/236156-elysian-fields?utm_medium=organic\u0026utm_source=microformat"}]}]
\ No newline at end of file
diff --git a/tests/test_jsonld.py b/tests/test_jsonld.py
index 47309ee9..63699908 100644
--- a/tests/test_jsonld.py
+++ b/tests/test_jsonld.py
@@ -41,6 +41,13 @@ def test_jsonld_with_control_characters_comment(self):
             folder='custom.invalid',
             page='JSONLD_with_control_characters_comment')
 
+    def test_jsonld_raw_json(self):
+        folder = 'songkick'
+        page = 'Elysian Fields Brooklyn Tickets, The Owl Music Parlor, 31 Oct 2015'
+        body = get_testdata(folder, '{}.html'.format(page))
+        expected = get_testdata(folder, '{}.raw.jsonld'.format(page)).decode('utf8')
+        self._check_jsonld(body, [expected], parse_json=False)
+
     def assertJsonLdCorrect(self, folder, page):
         body, expected = self._get_body_expected(folder, page)
         self._check_jsonld(body, expected)
@@ -50,7 +57,7 @@ def _get_body_expected(self, folder, page):
         expected = get_testdata(folder, '{}.jsonld'.format(page))
         return body, json.loads(expected.decode('utf8'))
 
-    def _check_jsonld(self, body, expected):
+    def _check_jsonld(self, body, expected, **extract_kwargs):
         jsonlde = JsonLdExtractor()
-        data = jsonlde.extract(body)
+        data = jsonlde.extract(body, **extract_kwargs)
         self.assertEqual(data, expected)
diff --git a/tests/test_rdfa.py b/tests/test_rdfa.py
index 98da33d2..59f25232 100644
--- a/tests/test_rdfa.py
+++ b/tests/test_rdfa.py
@@ -90,6 +90,19 @@ def test_wikipedia_xhtml_rdfa(self):
 
         self.assertJsonLDEqual(data, expected)
 
+    def test_wikipedia_xhtml_rdfa_raw(self):
+        """
+        test wether raw json is extracted properly
+        using parse_json=False keyword argument for the extraction method
+        """
+        fileprefix = 'xhtml+rdfa'
+        body = get_testdata('wikipedia', fileprefix + '.html')
+        expected = get_testdata('wikipedia', fileprefix + '.expanded.json').decode('UTF-8').strip()
+        data = RDFaExtractor().extract(
+            body, base_url='http://www.example.com/index.html', parse_json=False
+        ).strip()
+        self.assertEquals(self.normalize_bnode_ids(data), self.normalize_bnode_ids(expected))
+
     def test_wikipedia_xhtml_rdfa_no_prefix(self):
         body = get_testdata('misc', 'Portfolio_Niels_Lubberman.html')
         expected = json.loads(
@@ -98,5 +111,4 @@ def test_wikipedia_xhtml_rdfa_no_prefix(self):
 
         rdfae = RDFaExtractor()
         data = rdfae.extract(body, base_url='http://nielslubberman.nl/drupal/')
-
         self.assertJsonLDEqual(data, expected)