From ba568a32410ce1f1be98288e3aa6b1f421ae991b Mon Sep 17 00:00:00 2001
From: Joaquin <joaquingc123@gmail.com>
Date: Thu, 13 Dec 2018 17:51:15 -0500
Subject: [PATCH 01/11] Add dublincore schema

---
 extruct/_extruct.py                     |   8 +-
 extruct/dublincore.py                   | 156 ++++++++++++++++++++++++
 tests/samples/misc/dublincore_test.html |  21 ++++
 tests/samples/misc/dublincore_test.json |  22 ++++
 tests/tests_dublincore.py               |  19 +++
 5 files changed, 225 insertions(+), 1 deletion(-)
 create mode 100644 extruct/dublincore.py
 create mode 100644 tests/samples/misc/dublincore_test.html
 create mode 100644 tests/samples/misc/dublincore_test.json
 create mode 100644 tests/tests_dublincore.py

diff --git a/extruct/_extruct.py b/extruct/_extruct.py
index ba35a6fa..c6fd0c74 100644
--- a/extruct/_extruct.py
+++ b/extruct/_extruct.py
@@ -6,11 +6,12 @@
 from extruct.w3cmicrodata import MicrodataExtractor
 from extruct.opengraph import OpenGraphExtractor
 from extruct.microformat import MicroformatExtractor
+from extruct.dublincore import DublinCoreExtractor
 from extruct.uniform import _umicrodata_microformat, _uopengraph
 from extruct.utils import parse_xmldom_html
 
 logger = logging.getLogger(__name__)
-SYNTAXES = ['microdata', 'opengraph', 'json-ld', 'microformat', 'rdfa']
+SYNTAXES = ['microdata', 'opengraph', 'json-ld', 'microformat', 'rdfa', 'dublincore']
 
 
 def extract(htmlstring,
@@ -95,6 +96,11 @@ def extract(htmlstring,
             ('rdfa', RDFaExtractor().extract_items,
              tree,
              ))
+    if 'dublincore' in syntaxes:
+        processors.append(
+            ('dublincore', DublinCoreExtractor().extract_items,
+             tree,
+             ))
     output = {}
     for syntax, extract, document in processors:
         try:
diff --git a/extruct/dublincore.py b/extruct/dublincore.py
new file mode 100644
index 00000000..bce9c857
--- /dev/null
+++ b/extruct/dublincore.py
@@ -0,0 +1,156 @@
+import re
+
+from extruct.utils import parse_html
+
+_DC_ELEMENTS = {  # Defined according DCMES(DCM Version 1.1): http://dublincore.org/documents/dces/
+    'contributor': 'http://purl.org/dc/elements/1.1/contributor',
+    'coverage': 'http://purl.org/dc/elements/1.1/coverage',
+    'creator': 'http://purl.org/dc/elements/1.1/creator',
+    'date': 'http://purl.org/dc/elements/1.1/date',
+    'description': 'http://purl.org/dc/elements/1.1/description',
+    'format': 'http://purl.org/dc/elements/1.1/format',
+    'identifier': 'http://purl.org/dc/elements/1.1/identifier',
+    'language': 'http://purl.org/dc/elements/1.1/language',
+    'publisher': 'http://purl.org/dc/elements/1.1/publiser',
+    'relation': 'http://purl.org/dc/elements/1.1/relation',
+    'rights': 'http://purl.org/dc/elements/1.1/rights',
+    'source': 'http://purl.org/dc/elements/1.1/source',
+    'subject': 'http://purl.org/dc/elements/1.1/subject',
+    'title': 'http://purl.org/dc/elements/1.1/title',
+    'type': 'http://purl.org/dc/elements/1.1/type',
+}
+
+_DC_TERMS = {  # Defined according: http://dublincore.org/documents/2008/01/14/dcmi-terms/
+    'abstract': 'http://purl.org/dc/terms/abstract',
+    'description': 'http://purl.org/dc/terms/description',
+    'accessrights': 'http://purl.org/dc/terms/accessRights',
+    'rights': 'http://purl.org/dc/terms/rights',
+    'rightsstatement': 'http://purl.org/dc/terms/RightsStatement',
+    'accrualmethod': 'http://purl.org/dc/terms/accrualMethod',
+    'collection': 'http://purl.org/dc/terms/Collection',
+    'methodOfaccrual': 'http://purl.org/dc/terms/MethodOfAccrual',
+    'accrualperiodicity': 'http://purl.org/dc/terms/accrualPeriodicity',
+    'frequency': 'http://purl.org/dc/terms/Frequency',
+    'accrualpolicy': 'http://purl.org/dc/terms/accrualPolicy',
+    'policy': 'http://purl.org/dc/terms/Policy',
+    'alternative': 'http://purl.org/dc/terms/alternative',
+    'title': 'http://purl.org/dc/terms/title',
+    'audience': 'http://purl.org/dc/terms/audience',
+    'agentclass': 'http://purl.org/dc/terms/AgentClass',
+    'available': 'http://purl.org/dc/terms/available',
+    'date': 'http://purl.org/dc/terms/date',
+    'bibliographiccitation': 'http://purl.org/dc/terms/bibliographicCitation',
+    'identifier': 'http://purl.org/dc/terms/identifier',
+    'bibliographicresource': 'http://purl.org/dc/terms/BibliographicResource',
+    'conformsto': 'http://purl.org/dc/terms/conformsTo',
+    'relation': 'http://purl.org/dc/terms/relation',
+    'standard': 'http://purl.org/dc/terms/Standard',
+    'contributor': 'http://purl.org/dc/terms/contributor',
+    'agent': 'http://purl.org/dc/terms/Agent',
+    'coverage': 'http://purl.org/dc/terms/coverage',
+    'locationperiodorjurisdiction': 'http://purl.org/dc/terms/LocationPeriodOrJurisdiction',
+    'created': 'http://purl.org/dc/terms/created',
+    'creator': 'http://purl.org/dc/terms/creator',
+    'dateaccepted': 'http://purl.org/dc/terms/dateAccepted',
+    'datecopyrighted': 'http://purl.org/dc/terms/dateCopyrighted',
+    'datesubmitted': 'http://purl.org/dc/terms/dateSubmitted',
+    'educationlevel': 'http://purl.org/dc/terms/educationLevel',
+    'extent': 'http://purl.org/dc/terms/extent',
+    'format': 'http://purl.org/dc/terms/format',
+    'sizeorduration': 'http://purl.org/dc/terms/SizeOrDuration',
+    'mediatypeorextent': 'http://purl.org/dc/terms/MediaTypeOrExtent',
+    'hasformat': 'http://purl.org/dc/terms/hasFormat',
+    'haspart': 'http://purl.org/dc/terms/hasPart',
+    'hasversion': 'http://purl.org/dc/terms/hasVersion',
+    'instructionalmethod': 'http://purl.org/dc/terms/instructionalMethod',
+    'methodofinstruction': 'http://purl.org/dc/terms/MethodOfInstruction',
+    'isformatof': 'http://purl.org/dc/terms/isFormatOf',
+    'ispartof': 'http://purl.org/dc/terms/isPartOf',
+    'isreferencedby': 'http://purl.org/dc/terms/isReferencedBy',
+    'isreplacedby': 'http://purl.org/dc/terms/isReplacedBy',
+    'isrequiredby': 'http://purl.org/dc/terms/isRequiredBy',
+    'issued': 'http://purl.org/dc/terms/issued',
+    'isversionof': 'http://purl.org/dc/terms/isVersionOf',
+    'language': 'http://purl.org/dc/terms/language',
+    'linguisticsystem': 'http://purl.org/dc/terms/LinguisticSystem',
+    'license': 'http://purl.org/dc/terms/license',
+    'licensedocument': 'http://purl.org/dc/terms/LicenseDocument',
+    'mediator': 'http://purl.org/dc/terms/mediator',
+    'medium': 'http://purl.org/dc/terms/medium',
+    'physicalresource': 'http://purl.org/dc/terms/PhysicalResource',
+    'physicalmedium': 'http://purl.org/dc/terms/PhysicalMedium',
+    'modified': 'http://purl.org/dc/terms/modified',
+    'provenance': 'http://purl.org/dc/terms/provenance',
+    'provenancestatement': 'http://purl.org/dc/terms/ProvenanceStatement',
+    'publisher': 'http://purl.org/dc/terms/publisher',
+    'references': 'http://purl.org/dc/terms/references',
+    'replaces': 'http://purl.org/dc/terms/replaces',
+    'requires': 'http://purl.org/dc/terms/requires',
+    'rightsholder': 'http://purl.org/dc/terms/rightsHolder',
+    'source': 'http://purl.org/dc/terms/source',
+    'spatial': 'http://purl.org/dc/terms/spatial',
+    'location': 'http://purl.org/dc/terms/Location',
+    'subject': 'http://purl.org/dc/terms/subject',
+    'tableofcontents': 'http://purl.org/dc/terms/tableOfContents',
+    'temporal': 'http://purl.org/dc/terms/temporal',
+    'periodoftime': 'http://purl.org/dc/terms/PeriodOfTime',
+    'type': 'http://purl.org/dc/terms/type',
+    'valid': 'http://purl.org/dc/terms/valid',
+}
+
+_URL_NAMESPACES = ['http://purl.org/dc/terms/', 'http://purl.org/dc/elements/1.1/']
+
+
+class DublinCoreExtractor(object):
+    """DublinCore extractor following extruct API."""
+
+    def extract(self, htmlstring, base_url=None, encoding='UTF-8'):
+        tree = parse_html(htmlstring, encoding=encoding)
+        return list(self.extract_items(tree, base_url=base_url))
+
+    def extract_items(self, document, base_url=None):
+        elements = []
+        terms = []
+
+        def get_lower_attrib(name):
+            # get attribute to compare against _DC_TERMS or _DC_ELEMENTS
+            return re.sub(".*\.", "", name).lower()
+
+        def attrib_to_dict(attribs):
+            # convert _attrib type to dict
+            node_dict = {}
+            for attrib, value in attribs.items():
+                node_dict.update({attrib: value})
+            return node_dict
+
+        def populate_results(node, main_attrib):
+            # fill list with DC Elements or DC Terms
+            node_attrib = node.attrib
+            if main_attrib not in node_attrib:
+                return
+
+            name = node.attrib[main_attrib]
+            lower_name = get_lower_attrib(name)
+            if lower_name in _DC_ELEMENTS:
+                node.attrib.update({'URI': _DC_ELEMENTS[lower_name]})
+                elements.append(attrib_to_dict(node.attrib))
+
+            elif lower_name in _DC_TERMS:
+                node.attrib.update({'URI': _DC_TERMS[lower_name]})
+                terms.append(attrib_to_dict(node.attrib))
+
+        namespaces_nodes = document.xpath('//link[contains(@rel,"schema")]')
+        namespaces = {}
+        for i in namespaces_nodes:
+            if i.attrib['href'] in _URL_NAMESPACES:
+                namespaces.update({re.sub("schema\.", "", i.attrib['rel']): i.attrib['href']})
+
+        list_meta_node = document.xpath('//meta')
+        for meta_node in list_meta_node:
+            populate_results(meta_node, 'name')
+
+        list_link_node = document.xpath('//link')
+        for link_node in list_link_node:
+            populate_results(link_node, 'rel')
+
+        yield {'namespaces': namespaces, 'elements': elements, 'terms': terms}
diff --git a/tests/samples/misc/dublincore_test.html b/tests/samples/misc/dublincore_test.html
new file mode 100644
index 00000000..44a192ce
--- /dev/null
+++ b/tests/samples/misc/dublincore_test.html
@@ -0,0 +1,21 @@
+<head profile="http://dublincore.org/documents/dcq-html/">
+<title>Expressing Dublin Core in HTML/XHTML meta and link elements</title>
+<link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" />
+<link rel="schema.DCTERMS" href="http://purl.org/dc/terms/" />
+
+
+<meta name="DC.title" lang="en" content="Expressing Dublin Core
+in HTML/XHTML meta and link elements" />
+<meta name="DC.creator" content="Andy Powell, UKOLN, University of Bath" />
+<meta name="DCTERMS.issued" scheme="DCTERMS.W3CDTF" content="2003-11-01" />
+<meta name="DC.identifier" scheme="DCTERMS.URI"
+content="http://dublincore.org/documents/dcq-html/" />
+<link rel="DCTERMS.replaces" hreflang="en"
+href="http://dublincore.org/documents/2000/08/15/dcq-html/" />
+<meta name="DCTERMS.abstract" content="This document describes how
+qualified Dublin Core metadata can be encoded
+in HTML/XHTML &lt;meta&gt; elements" />
+<meta name="DC.format" scheme="DCTERMS.IMT" content="text/html" />
+<meta name="DC.type" scheme="DCTERMS.DCMIType" content="Text" />
+<meta name="DC.Date.modified" content="2001-07-18" />
+<meta name="DCTERMS.modified" content="2001-07-18" />
\ No newline at end of file
diff --git a/tests/samples/misc/dublincore_test.json b/tests/samples/misc/dublincore_test.json
new file mode 100644
index 00000000..7dbb9e5e
--- /dev/null
+++ b/tests/samples/misc/dublincore_test.json
@@ -0,0 +1,22 @@
+[
+  {
+    "namespaces": {
+      "DC": "http://purl.org/dc/elements/1.1/",
+      "DCTERMS": "http://purl.org/dc/terms/"
+    },
+  "elements": [
+    {"name": "DC.title", "lang": "en", "content": "Expressing Dublin Core\nin HTML/XHTML meta and link elements", "URI": "http://purl.org/dc/elements/1.1/title"},
+    {"name": "DC.creator", "content": "Andy Powell, UKOLN, University of Bath", "URI": "http://purl.org/dc/elements/1.1/creator"},
+    {"name": "DC.identifier", "scheme": "DCTERMS.URI", "content": "http://dublincore.org/documents/dcq-html/", "URI": "http://purl.org/dc/elements/1.1/identifier"},
+    {"name": "DC.format", "scheme": "DCTERMS.IMT", "content": "text/html", "URI": "http://purl.org/dc/elements/1.1/format"},
+    {"name": "DC.type", "scheme": "DCTERMS.DCMIType", "content": "Text", "URI": "http://purl.org/dc/elements/1.1/type"}
+  ],
+  "terms": [
+    {"name": "DCTERMS.issued", "scheme": "DCTERMS.W3CDTF", "content": "2003-11-01", "URI": "http://purl.org/dc/terms/issued"},
+    {"name": "DCTERMS.abstract", "content": "This document describes how\nqualified Dublin Core metadata can be encoded\nin HTML/XHTML <meta> elements", "URI": "http://purl.org/dc/terms/abstract"},
+    {"name": "DC.Date.modified", "content": "2001-07-18", "URI": "http://purl.org/dc/terms/modified"},
+    {"name": "DCTERMS.modified", "content": "2001-07-18", "URI": "http://purl.org/dc/terms/modified"},
+    {"rel": "DCTERMS.replaces", "hreflang": "en", "href": "http://dublincore.org/documents/2000/08/15/dcq-html/", "URI": "http://purl.org/dc/terms/replaces"}
+  ]
+  }
+]
diff --git a/tests/tests_dublincore.py b/tests/tests_dublincore.py
new file mode 100644
index 00000000..a11ce603
--- /dev/null
+++ b/tests/tests_dublincore.py
@@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+import json
+import unittest
+
+from extruct.dublincore import DublinCoreExtractor
+from tests import get_testdata, jsonize_dict
+
+
+class TestDublincore(unittest.TestCase):
+
+    maxDiff = None
+
+    def test_dublincore(self):
+        body = get_testdata('misc', 'dublincore_test.html')
+        expected = json.loads(get_testdata('misc', 'dublincore_test.json').decode('UTF-8'))
+
+        dublincorext = DublinCoreExtractor()
+        data = dublincorext.extract(body)
+        self.assertEqual(jsonize_dict(data), expected)

From edc1f64483a90474b6158cae9ad362e88a698c24 Mon Sep 17 00:00:00 2001
From: Joaquin <joaquingc123@gmail.com>
Date: Mon, 17 Dec 2018 23:10:44 -0500
Subject: [PATCH 02/11] Update tests and change to raw strings

---
 extruct/dublincore.py                     |  4 ++--
 tests/samples/songkick/elysianfields.json | 16 +++++++++++++++-
 tests/samples/songkick/tovestyrke.json    | 15 ++++++++++++++-
 3 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/extruct/dublincore.py b/extruct/dublincore.py
index bce9c857..3c9a358d 100644
--- a/extruct/dublincore.py
+++ b/extruct/dublincore.py
@@ -114,7 +114,7 @@ def extract_items(self, document, base_url=None):
 
         def get_lower_attrib(name):
             # get attribute to compare against _DC_TERMS or _DC_ELEMENTS
-            return re.sub(".*\.", "", name).lower()
+            return re.sub(r".*\.", "", name).lower()
 
         def attrib_to_dict(attribs):
             # convert _attrib type to dict
@@ -143,7 +143,7 @@ def populate_results(node, main_attrib):
         namespaces = {}
         for i in namespaces_nodes:
             if i.attrib['href'] in _URL_NAMESPACES:
-                namespaces.update({re.sub("schema\.", "", i.attrib['rel']): i.attrib['href']})
+                namespaces.update({re.sub(r"schema\.", "", i.attrib['rel']): i.attrib['href']})
 
         list_meta_node = document.xpath('//meta')
         for meta_node in list_meta_node:
diff --git a/tests/samples/songkick/elysianfields.json b/tests/samples/songkick/elysianfields.json
index 0e67e302..9ad1d6f5 100644
--- a/tests/samples/songkick/elysianfields.json
+++ b/tests/samples/songkick/elysianfields.json
@@ -261,5 +261,19 @@
                 }
             ]
         }
-    ]
+    ],
+    "dublincore": [
+        {
+            "namespaces": {
+        },
+            "elements": [
+                {
+                    "name": "description",
+                    "content": "Buy tickets for an upcoming Elysian Fields concert near you. List of all Elysian Fields tickets and tour dates for 2017.",
+                    "URI": "http://purl.org/dc/elements/1.1/description"
+                }
+            ],
+   "terms": [
+
+   ]}]
 }
\ No newline at end of file
diff --git a/tests/samples/songkick/tovestyrke.json b/tests/samples/songkick/tovestyrke.json
index 7e17abdd..4e47acce 100644
--- a/tests/samples/songkick/tovestyrke.json
+++ b/tests/samples/songkick/tovestyrke.json
@@ -188,5 +188,18 @@
             ]
         }
     ],
-    "microformat": []
+    "microformat": [],
+     "dublincore": [
+         {
+             "namespaces": {
+
+             },
+   "elements": [
+       {
+           "name": "description",
+           "content": "Past concert. Tove Styrke concert with Geowulf at Hoxton Square Bar & Kitchen in London on 12 Jun 2017.",
+           "URI": "http://purl.org/dc/elements/1.1/description"
+       }],
+   "terms": [
+   ]}]
 }
\ No newline at end of file

From cd01c5ff27e76f37df6ee295b3874557984bfaa3 Mon Sep 17 00:00:00 2001
From: Joaquin <joaquingc123@gmail.com>
Date: Mon, 17 Dec 2018 23:42:04 -0500
Subject: [PATCH 03/11] Fix file typo

---
 tests/{tests_dublincore.py => test_dublincore.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/{tests_dublincore.py => test_dublincore.py} (100%)

diff --git a/tests/tests_dublincore.py b/tests/test_dublincore.py
similarity index 100%
rename from tests/tests_dublincore.py
rename to tests/test_dublincore.py

From 3d4bf5d6258bf3d3ecec45c3646585772881b6aa Mon Sep 17 00:00:00 2001
From: Joaquin <joaquingc123@gmail.com>
Date: Sun, 23 Dec 2018 02:15:09 -0500
Subject: [PATCH 04/11] Add uniform option

---
 extruct/_extruct.py   | 12 ++++++++++--
 extruct/dublincore.py |  9 +++++----
 extruct/uniform.py    | 17 +++++++++++++++++
 tests/test_uniform.py | 41 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/extruct/_extruct.py b/extruct/_extruct.py
index c6fd0c74..81a951f6 100644
--- a/extruct/_extruct.py
+++ b/extruct/_extruct.py
@@ -7,7 +7,7 @@
 from extruct.opengraph import OpenGraphExtractor
 from extruct.microformat import MicroformatExtractor
 from extruct.dublincore import DublinCoreExtractor
-from extruct.uniform import _umicrodata_microformat, _uopengraph
+from extruct.uniform import _umicrodata_microformat, _uopengraph, _udublincore
 from extruct.utils import parse_xmldom_html
 
 logger = logging.getLogger(__name__)
@@ -137,9 +137,17 @@ def extract(htmlstring,
                  output['opengraph'],
                  None,
                  ))
+        if 'dublincore' in syntaxes:
+            uniform_processors.append(
+                ('dublincore',
+                 _udublincore,
+                 output['dublincore'],
+                 None,
+                 ))
+
         for syntax, uniform, raw, schema_context in uniform_processors:
             try:
-                if syntax == 'opengraph':
+                if syntax in ['opengraph', 'dublincore']:
                     output[syntax] = uniform(raw)
                 else:
                     output[syntax] = uniform(raw, schema_context)
diff --git a/extruct/dublincore.py b/extruct/dublincore.py
index 3c9a358d..14261e75 100644
--- a/extruct/dublincore.py
+++ b/extruct/dublincore.py
@@ -101,6 +101,11 @@
 _URL_NAMESPACES = ['http://purl.org/dc/terms/', 'http://purl.org/dc/elements/1.1/']
 
 
+def get_lower_attrib(name):
+    # get attribute to compare against _DC_TERMS or _DC_ELEMENTS
+    return re.sub(r".*\.", "", name).lower()
+
+
 class DublinCoreExtractor(object):
     """DublinCore extractor following extruct API."""
 
@@ -112,10 +117,6 @@ def extract_items(self, document, base_url=None):
         elements = []
         terms = []
 
-        def get_lower_attrib(name):
-            # get attribute to compare against _DC_TERMS or _DC_ELEMENTS
-            return re.sub(r".*\.", "", name).lower()
-
         def attrib_to_dict(attribs):
             # convert _attrib type to dict
             node_dict = {}
diff --git a/extruct/uniform.py b/extruct/uniform.py
index 9a530b53..824f03ba 100644
--- a/extruct/uniform.py
+++ b/extruct/uniform.py
@@ -1,4 +1,5 @@
 from six.moves.urllib.parse import urlparse, urljoin
+from extruct.dublincore import get_lower_attrib
 
 
 def _uopengraph(extracted):
@@ -23,6 +24,22 @@ def _umicrodata_microformat(extracted, schema_context):
     return res
 
 
+def _udublincore(extracted):
+    out = []
+    for obj in extracted:
+        context = obj.pop('namespaces', None)
+        obj['@context'] = context
+        elements = obj['elements']
+        for element in elements:
+            for key, value in element.items():
+                if get_lower_attrib(value) == 'type':
+                    obj['@type'] = element['content']
+                    elements.remove(element)
+                    break
+        out.append(obj)
+    return out
+
+
 def _flatten(element, schema_context):
     if isinstance(element, dict):
         element = flatten_dict(element, schema_context, False)
diff --git a/tests/test_uniform.py b/tests/test_uniform.py
index db178f51..185ca4b0 100644
--- a/tests/test_uniform.py
+++ b/tests/test_uniform.py
@@ -84,6 +84,47 @@ def test_umicrodata(self):
         data = extruct.extract(body, syntaxes=['microdata'], uniform=True)
         self.assertEqual(data['microdata'], expected)
 
+    def test_udublincore(self):
+        expected = [{'elements': [{'name': 'DC.title',
+                     'lang': 'en',
+                     'content': 'Expressing Dublin Core\nin HTML/XHTML meta and link elements',
+                     'URI': 'http://purl.org/dc/elements/1.1/title'},
+                    {'name': 'DC.creator',
+                     'content': 'Andy Powell, UKOLN, University of Bath',
+                     'URI': 'http://purl.org/dc/elements/1.1/creator'},
+                    {'name': 'DC.identifier',
+                     'scheme': 'DCTERMS.URI',
+                     'content': 'http://dublincore.org/documents/dcq-html/',
+                     'URI': 'http://purl.org/dc/elements/1.1/identifier'},
+                    {'name': 'DC.format',
+                     'scheme': 'DCTERMS.IMT',
+                     'content': 'text/html',
+                     'URI': 'http://purl.org/dc/elements/1.1/format'}],
+                   'terms': [{'name': 'DCTERMS.issued',
+                     'scheme': 'DCTERMS.W3CDTF',
+                     'content': '2003-11-01',
+                     'URI': 'http://purl.org/dc/terms/issued'},
+                    {'name': 'DCTERMS.abstract',
+                     'content': 'This document describes how\nqualified Dublin Core metadata can be encoded\nin HTML/XHTML <meta> elements',
+                     'URI': 'http://purl.org/dc/terms/abstract'},
+                    {'name': 'DC.Date.modified',
+                     'content': '2001-07-18',
+                     'URI': 'http://purl.org/dc/terms/modified'},
+                    {'name': 'DCTERMS.modified',
+                     'content': '2001-07-18',
+                     'URI': 'http://purl.org/dc/terms/modified'},
+                    {'rel': 'DCTERMS.replaces',
+                     'hreflang': 'en',
+                     'href': 'http://dublincore.org/documents/2000/08/15/dcq-html/',
+                     'URI': 'http://purl.org/dc/terms/replaces'}],
+                   '@context': {'DC': 'http://purl.org/dc/elements/1.1/',
+                    'DCTERMS': 'http://purl.org/dc/terms/'},
+                   '@type': 'Text'}]
+        body = get_testdata('misc', 'dublincore_test.html')
+        data = extruct.extract(body, syntaxes=['dublincore'], uniform=True)
+        self.assertEqual(data['dublincore'], expected)
+
+
 
     def test_infer_context(self):
         context = 'http://schema.org/UsedCondition'

From 031427f0feaca4fb39875211e266951a5ef2ecbb Mon Sep 17 00:00:00 2001
From: Joaquin Garmendia Cabrera <joaquingc123@gmail.com>
Date: Mon, 14 Jan 2019 16:04:48 -0500
Subject: [PATCH 05/11] Update Readme with DublinCore Options

---
 README.rst | 303 +++++++++++++++++++++++++++++++++++------------------
 1 file changed, 201 insertions(+), 102 deletions(-)

diff --git a/README.rst b/README.rst
index 1c71c690..9f7ec438 100644
--- a/README.rst
+++ b/README.rst
@@ -26,6 +26,7 @@ Currently, *extruct* supports:
 - `Microformat`_ via `mf2py`_
 - `Facebook's Open Graph`_
 - (experimental) `RDFa`_ via `rdflib`_
+- `Dublin Core Metadata`_
 
 .. _W3C's HTML Microdata: http://www.w3.org/TR/microdata/
 .. _embedded JSON-LD: http://www.w3.org/TR/json-ld/#embedding-json-ld-in-html-documents
@@ -34,6 +35,7 @@ Currently, *extruct* supports:
 .. _Microformat: http://microformats.org/wiki/Main_Page
 .. _mf2py: https://github.com/microformats/mf2py
 .. _Facebook's Open Graph: http://ogp.me/
+.. _Dublin Core Metadata: http://dublincore.org/documents/dcq-html/
 
 The microdata algorithm is a revisit of `this Scrapinghub blog post`_ showing how to use EXSLT extensions.
 
@@ -62,110 +64,132 @@ Let's try this on a webpage that uses all the syntaxes supported (RDFa with `ogp
 
 First fetch the HTML using python-requests and then feed the response body to ``extruct``::
 
-  >>> import extruct
-  >>> import requests
-  >>> import pprint
-  >>> from w3lib.html import get_base_url
-  >>>
-  >>> pp = pprint.PrettyPrinter(indent=2)
-  >>> r = requests.get('https://www.optimizesmart.com/how-to-use-open-graph-protocol/')
-  >>> base_url = get_base_url(r.text, r.url)
-  >>> data = extruct.extract(r.text, base_url=base_url)
-  >>>
-  >>> pp.pprint(data)
-  { 'json-ld': [ { '@context': 'https://schema.org',
-                   '@id': '#organization',
-                   '@type': 'Organization',
-                   'logo': 'https://www.optimizesmart.com/wp-content/uploads/2016/03/optimize-smart-Twitter-logo.jpg',
-                   'name': 'Optimize Smart',
-                   'sameAs': [ 'https://www.facebook.com/optimizesmart/',
-                               'https://uk.linkedin.com/in/analyticsnerd',
-                               'https://www.youtube.com/user/optimizesmart',
-                               'https://twitter.com/analyticsnerd'],
-                   'url': 'https://www.optimizesmart.com/'}],
-    'microdata': [ { 'properties': {'headline': ''},
-                     'type': 'http://schema.org/WPHeader'}],
-    'microformat': [ { 'children': [ { 'properties': { 'category': [ 'specialized-tracking'],
-                                                       'name': [ 'Open Graph '
-                                                                 'Protocol for '
-                                                                 'Facebook '
-                                                                 'explained with '
-                                                                 'examples\n'
-                                                                 '\n'
-                                                                 'Specialized '
-                                                                 'Tracking\n'
-                                                                 '\n'
-                                                                 '\n'
-                                                                 (...)
-                                                                 'Follow '
-                                                                 '@analyticsnerd\n'
-                                                                 '!function(d,s,id){var '
-                                                                 "js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, "
-                                                                 "'script', "
-                                                                 "'twitter-wjs');"]},
-                                       'type': ['h-entry']}],
-                       'properties': { 'name': [ 'Open Graph Protocol for '
-                                                 'Facebook explained with '
-                                                 'examples\n'
-                                                 (...)
-                                                 'Follow @analyticsnerd\n'
-                                                 '!function(d,s,id){var '
-                                                 "js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, "
-                                                 "'script', 'twitter-wjs');"]},
-                       'type': ['h-feed']}],
-    'opengraph': [ { 'namespace': {'og': 'http://ogp.me/ns#'},
-                     'properties': [ ('og:locale', 'en_US'),
-                                     ('og:type', 'article'),
-                                     ( 'og:title',
-                                       'Open Graph Protocol for Facebook '
-                                       'explained with examples'),
-                                     ( 'og:description',
-                                       'What is Open Graph Protocol and why you '
-                                       'need it? Learn to implement Open Graph '
-                                       'Protocol for Facebook on your website. '
-                                       'Open Graph Protocol Meta Tags.'),
-                                     ( 'og:url',
-                                       'https://www.optimizesmart.com/how-to-use-open-graph-protocol/'),
-                                     ('og:site_name', 'Optimize Smart'),
-                                     ( 'og:updated_time',
-                                       '2018-03-09T16:26:35+00:00'),
-                                     ( 'og:image',
-                                       'https://www.optimizesmart.com/wp-content/uploads/2010/07/open-graph-protocol.jpg'),
-                                     ( 'og:image:secure_url',
-                                       'https://www.optimizesmart.com/wp-content/uploads/2010/07/open-graph-protocol.jpg')]}],
-    'rdfa': [ { '@id': 'https://www.optimizesmart.com/how-to-use-open-graph-protocol/#header',
-                'http://www.w3.org/1999/xhtml/vocab#role': [ { '@id': 'http://www.w3.org/1999/xhtml/vocab#banner'}]},
-              { '@id': 'https://www.optimizesmart.com/how-to-use-open-graph-protocol/',
-                'article:modified_time': [ { '@value': '2018-03-09T16:26:35+00:00'}],
-                'article:published_time': [ { '@value': '2010-07-02T18:57:23+00:00'}],
-                'article:publisher': [ { '@value': 'https://www.facebook.com/optimizesmart/'}],
-                'article:section': [{'@value': 'Specialized Tracking'}],
-                'http://ogp.me/ns#description': [ { '@value': 'What is Open '
-                                                              'Graph Protocol '
-                                                              'and why you need '
-                                                              'it? Learn to '
-                                                              'implement Open '
-                                                              'Graph Protocol '
-                                                              'for Facebook on '
-                                                              'your website. '
-                                                              'Open Graph '
-                                                              'Protocol Meta '
-                                                              'Tags.'}],
-                'http://ogp.me/ns#image': [ { '@value': 'https://www.optimizesmart.com/wp-content/uploads/2010/07/open-graph-protocol.jpg'}],
-                'http://ogp.me/ns#image:secure_url': [ { '@value': 'https://www.optimizesmart.com/wp-content/uploads/2010/07/open-graph-protocol.jpg'}],
-                'http://ogp.me/ns#locale': [{'@value': 'en_US'}],
-                'http://ogp.me/ns#site_name': [{'@value': 'Optimize Smart'}],
-                'http://ogp.me/ns#title': [ { '@value': 'Open Graph Protocol for '
-                                                        'Facebook explained with '
-                                                        'examples'}],
-                'http://ogp.me/ns#type': [{'@value': 'article'}],
-                'http://ogp.me/ns#updated_time': [ { '@value': '2018-03-09T16:26:35+00:00'}],
-                'http://ogp.me/ns#url': [ { '@value': 'https://www.optimizesmart.com/how-to-use-open-graph-protocol/'}],
-                'https://api.w.org/': [ { '@id': 'https://www.optimizesmart.com/wp-json/'}]}]}
+    >>> import extruct
+    >>> import requests
+    >>> import pprint
+    >>> from w3lib.html import get_base_url
+    >>> pp = pprint.PrettyPrinter(indent=2)
+    >>> r = requests.get('https://www.optimizesmart.com/how-to-use-open-graph-protocol/')
+    >>> base_url = get_base_url(r.text, r.url)
+    >>> data = extruct.extract(r.text, base_url=base_url)
+    >>> pp.pprint(data)
+    { 'dublincore': [ { 'elements': [ { 'URI': 'http://purl.org/dc/elements/1.1/description',
+                                        'content': 'What is Open Graph Protocol '
+                                                   'and why you need it? Learn to '
+                                                   'implement Open Graph Protocol '
+                                                   'for Facebook on your website. '
+                                                   'Open Graph Protocol Meta Tags.',
+                                        'name': 'description'}],
+                        'namespaces': {},
+                        'terms': []}],
+      'json-ld': [ { '@context': 'https://schema.org',
+                     '@id': 'https://www.optimizesmart.com/#organization',
+                     '@type': 'Organization',
+                     'logo': 'https://www.optimizesmart.com/wp-content/uploads/2016/03/optimize-smart-Twitter-logo.jpg',
+                     'name': 'Optimize Smart',
+                     'sameAs': [ 'https://www.facebook.com/optimizesmart/',
+                                 'https://uk.linkedin.com/in/analyticsnerd',
+                                 'https://www.youtube.com/user/optimizesmart',
+                                 'https://twitter.com/analyticsnerd'],
+                     'url': 'https://www.optimizesmart.com/'},
+                   { '@context': 'http://schema.org',
+                     '@id': '',
+                     '@type': 'ProfessionalService',
+                     'address': { '@type': 'PostalAddress',
+                                  'addressCountry': 'GB',
+                                  'addressLocality': 'Southampton',
+                                  'postalCode': '',
+                                  'streetAddress': ''},
+                     'image': 'https://www.optimizesmart.com/wp-content/themes/Sept17OptimizeSmartDEV/images/logo-small.png',
+                     'name': 'Optimize Smart',
+                     'openingHoursSpecification': { '@type': 'OpeningHoursSpecification',
+                                                    'closes': '23:59',
+                                                    'dayOfWeek': [ 'Monday',
+                                                                   'Tuesday',
+                                                                   'Wednesday',
+                                                                   'Thursday',
+                                                                   'Friday',
+                                                                   'Saturday',
+                                                                   'Sunday'],
+                                                    'opens': '00:00'},
+                     'sameAs': [ 'https://www.facebook.com/optimizesmart/',
+                                 'https://twitter.com/OptimizeSmart',
+                                 'https://www.youtube.com/user/optimizesmart',
+                                 'https://www.linkedin.com/in/analyticsnerd/'],
+                     'telephone': '',
+                     'url': 'https://www.optimizesmart.com'}],
+      'microdata': [ { 'properties': {'headline': ''},
+                       'type': 'http://schema.org/WPHeader'}],
+      'microformat': [ { 'children': [ { 'properties': { 'category': [ 'facebook-tracking',
+                                                                       'specialized-tracking']},
+                                         'type': ['h-entry']}],
+                         'properties': {},
+                         'type': ['h-feed']}],
+      'opengraph': [ { 'namespace': { 'article': 'http://ogp.me/ns/article#',
+                                      'og': 'http://ogp.me/ns#'},
+                       'properties': [ ('og:locale', 'en_US'),
+                                       ('og:type', 'article'),
+                                       ( 'og:title',
+                                         'Open Graph Protocol for Facebook '
+                                         'explained with examples'),
+                                       ( 'og:description',
+                                         'What is Open Graph Protocol and why you '
+                                         'need it? Learn to implement Open Graph '
+                                         'Protocol for Facebook on your website. '
+                                         'Open Graph Protocol Meta Tags.'),
+                                       ( 'og:url',
+                                         'https://www.optimizesmart.com/how-to-use-open-graph-protocol/'),
+                                       ('og:site_name', 'Optimize Smart'),
+                                       ( 'article:publisher',
+                                         'https://www.facebook.com/optimizesmart/'),
+                                       ('article:section', 'Facebook Tracking'),
+                                       ( 'article:published_time',
+                                         '2017-02-02T18:57:23+00:00'),
+                                       ( 'article:modified_time',
+                                         '2019-01-11T10:49:01+00:00'),
+                                       ( 'og:updated_time',
+                                         '2019-01-11T10:49:01+00:00'),
+                                       ( 'og:image',
+                                         'https://www.optimizesmart.com/wp-content/uploads/2010/07/open-graph-protocol.jpg'),
+                                       ( 'og:image:secure_url',
+                                         'https://www.optimizesmart.com/wp-content/uploads/2010/07/open-graph-protocol.jpg'),
+                                       ('og:image:width', '711'),
+                                       ('og:image:height', '309')]}],
+      'rdfa': [ { '@id': 'https://www.optimizesmart.com/how-to-use-open-graph-protocol/#header',
+                  'http://www.w3.org/1999/xhtml/vocab#role': [ { '@id': 'http://www.w3.org/1999/xhtml/vocab#banner'}]},
+                { '@id': 'https://www.optimizesmart.com/how-to-use-open-graph-protocol/',
+                  'article:modified_time': [ { '@value': '2019-01-11T10:49:01+00:00'}],
+                  'article:published_time': [ { '@value': '2017-02-02T18:57:23+00:00'}],
+                  'article:publisher': [ { '@value': 'https://www.facebook.com/optimizesmart/'}],
+                  'article:section': [{'@value': 'Facebook Tracking'}],
+                  'http://ogp.me/ns#description': [ { '@value': 'What is Open '
+                                                                'Graph Protocol '
+                                                                'and why you need '
+                                                                'it? Learn to '
+                                                                'implement Open '
+                                                                'Graph Protocol '
+                                                                'for Facebook on '
+                                                                'your website. '
+                                                                'Open Graph '
+                                                                'Protocol Meta '
+                                                                'Tags.'}],
+                  'http://ogp.me/ns#image': [ { '@value': 'https://www.optimizesmart.com/wp-content/uploads/2010/07/open-graph-protocol.jpg'}],
+                  'http://ogp.me/ns#image:height': [{'@value': '309'}],
+                  'http://ogp.me/ns#image:secure_url': [ { '@value': 'https://www.optimizesmart.com/wp-content/uploads/2010/07/open-graph-protocol.jpg'}],
+                  'http://ogp.me/ns#image:width': [{'@value': '711'}],
+                  'http://ogp.me/ns#locale': [{'@value': 'en_US'}],
+                  'http://ogp.me/ns#site_name': [{'@value': 'Optimize Smart'}],
+                  'http://ogp.me/ns#title': [ { '@value': 'Open Graph Protocol for '
+                                                          'Facebook explained with '
+                                                          'examples'}],
+                  'http://ogp.me/ns#type': [{'@value': 'article'}],
+                  'http://ogp.me/ns#updated_time': [ { '@value': '2019-01-11T10:49:01+00:00'}],
+                  'http://ogp.me/ns#url': [ { '@value': 'https://www.optimizesmart.com/how-to-use-open-graph-protocol/'}],
+                  'https://api.w.org/': [ { '@id': 'https://www.optimizesmart.com/wp-json/'}]}]}
 
 Select syntaxes
 +++++++++++++++
-It is possible to select which syntaxes to extract by passing a list with the desired ones to extract. Valid values: 'microdata', 'json-ld', 'opengraph', 'microformat', 'rdfa'. If no list is passed all syntaxes will be extracted and returned::
+It is possible to select which syntaxes to extract by passing a list with the desired ones to extract. Valid values: 'microdata', 'json-ld', 'opengraph', 'microformat', 'rdfa', 'dublincore'. If no list is passed all syntaxes will be extracted and returned::
 
   >>> r = requests.get('http://www.songkick.com/artists/236156-elysian-fields')
   >>> base_url = get_base_url(r.text, r.url)
@@ -209,7 +233,7 @@ It is possible to select which syntaxes to extract by passing a list with the de
 
 Uniform
 +++++++
-Another option is to uniform the output of microformat, opengraph, microdata and json-ld syntaxes to the following structure: ::
+Another option is to uniform the output of microformat, opengraph, microdata, dublincore and json-ld syntaxes to the following structure: ::
 
     {'@context': 'http://example.com', 
                  '@type': 'example_type',
@@ -585,6 +609,81 @@ Microformat extraction
         ]
       }
    }]
+   
+DublinCore extraction
+++++++++++++++++++++++++++++++
+::
+
+    >>> import pprint
+    >>> pp = pprint.PrettyPrinter(indent=2)
+    >>> from extruct.dublincore import DublinCoreExtractor
+    >>> html = '''<head profile="http://dublincore.org/documents/dcq-html/">
+    ... <title>Expressing Dublin Core in HTML/XHTML meta and link elements</title>
+    ... <link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" />
+    ... <link rel="schema.DCTERMS" href="http://purl.org/dc/terms/" />
+    ... 
+    ... 
+    ... <meta name="DC.title" lang="en" content="Expressing Dublin Core
+    ... in HTML/XHTML meta and link elements" />
+    ... <meta name="DC.creator" content="Andy Powell, UKOLN, University of Bath" />
+    ... <meta name="DCTERMS.issued" scheme="DCTERMS.W3CDTF" content="2003-11-01" />
+    ... <meta name="DC.identifier" scheme="DCTERMS.URI"
+    ... content="http://dublincore.org/documents/dcq-html/" />
+    ... <link rel="DCTERMS.replaces" hreflang="en"
+    ... href="http://dublincore.org/documents/2000/08/15/dcq-html/" />
+    ... <meta name="DCTERMS.abstract" content="This document describes how
+    ... qualified Dublin Core metadata can be encoded
+    ... in HTML/XHTML &lt;meta&gt; elements" />
+    ... <meta name="DC.format" scheme="DCTERMS.IMT" content="text/html" />
+    ... <meta name="DC.type" scheme="DCTERMS.DCMIType" content="Text" />
+    ... <meta name="DC.Date.modified" content="2001-07-18" />
+    ... <meta name="DCTERMS.modified" content="2001-07-18" />'''
+    >>> dublinlde = DublinCoreExtractor()
+    >>> data = dublinlde.extract(html)
+    >>> pp.pprint(data)
+    [ { 'elements': [ { 'URI': 'http://purl.org/dc/elements/1.1/title',
+                        'content': 'Expressing Dublin Core\n'
+                                   'in HTML/XHTML meta and link elements',
+                        'lang': 'en',
+                        'name': 'DC.title'},
+                      { 'URI': 'http://purl.org/dc/elements/1.1/creator',
+                        'content': 'Andy Powell, UKOLN, University of Bath',
+                        'name': 'DC.creator'},
+                      { 'URI': 'http://purl.org/dc/elements/1.1/identifier',
+                        'content': 'http://dublincore.org/documents/dcq-html/',
+                        'name': 'DC.identifier',
+                        'scheme': 'DCTERMS.URI'},
+                      { 'URI': 'http://purl.org/dc/elements/1.1/format',
+                        'content': 'text/html',
+                        'name': 'DC.format',
+                        'scheme': 'DCTERMS.IMT'},
+                      { 'URI': 'http://purl.org/dc/elements/1.1/type',
+                        'content': 'Text',
+                        'name': 'DC.type',
+                        'scheme': 'DCTERMS.DCMIType'}],
+        'namespaces': { 'DC': 'http://purl.org/dc/elements/1.1/',
+                        'DCTERMS': 'http://purl.org/dc/terms/'},
+        'terms': [ { 'URI': 'http://purl.org/dc/terms/issued',
+                     'content': '2003-11-01',
+                     'name': 'DCTERMS.issued',
+                     'scheme': 'DCTERMS.W3CDTF'},
+                   { 'URI': 'http://purl.org/dc/terms/abstract',
+                     'content': 'This document describes how\n'
+                                'qualified Dublin Core metadata can be encoded\n'
+                                'in HTML/XHTML <meta> elements',
+                     'name': 'DCTERMS.abstract'},
+                   { 'URI': 'http://purl.org/dc/terms/modified',
+                     'content': '2001-07-18',
+                     'name': 'DC.Date.modified'},
+                   { 'URI': 'http://purl.org/dc/terms/modified',
+                     'content': '2001-07-18',
+                     'name': 'DCTERMS.modified'},
+                   { 'URI': 'http://purl.org/dc/terms/replaces',
+                     'href': 'http://dublincore.org/documents/2000/08/15/dcq-html/',
+                     'hreflang': 'en',
+                     'rel': 'DCTERMS.replaces'}]}]
+
+
 
 REST API service
 ----------------

From 8cc838509351ec8fb0d5581bd3ec97557d6523f4 Mon Sep 17 00:00:00 2001
From: Joaquin <joaquingc123@gmail.com>
Date: Mon, 14 Jan 2019 19:23:05 -0500
Subject: [PATCH 06/11] Fix list iteration

---
 extruct/uniform.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/extruct/uniform.py b/extruct/uniform.py
index 824f03ba..aefbcc7b 100644
--- a/extruct/uniform.py
+++ b/extruct/uniform.py
@@ -30,11 +30,11 @@ def _udublincore(extracted):
         context = obj.pop('namespaces', None)
         obj['@context'] = context
         elements = obj['elements']
-        for element in elements:
+        for element in list(elements):
             for key, value in element.items():
                 if get_lower_attrib(value) == 'type':
                     obj['@type'] = element['content']
-                    elements.remove(element)
+                    obj['elements'].remove(element)
                     break
         out.append(obj)
     return out

From ac2bdfce89a8fdff98d809570d76018f40b8b3ba Mon Sep 17 00:00:00 2001
From: Joaquin <joaquingc123@gmail.com>
Date: Thu, 17 Jan 2019 16:55:47 -0500
Subject: [PATCH 07/11] Make requested changes

---
 extruct/dublincore.py                     | 10 ++++------
 extruct/uniform.py                        |  2 +-
 tests/samples/songkick/elysianfields.json |  9 +++++----
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/extruct/dublincore.py b/extruct/dublincore.py
index 14261e75..fed54ab4 100644
--- a/extruct/dublincore.py
+++ b/extruct/dublincore.py
@@ -1,6 +1,7 @@
 import re
 
 from extruct.utils import parse_html
+from w3lib.html import strip_html5_whitespace
 
 _DC_ELEMENTS = {  # Defined according DCMES(DCM Version 1.1): http://dublincore.org/documents/dces/
     'contributor': 'http://purl.org/dc/elements/1.1/contributor',
@@ -119,10 +120,7 @@ def extract_items(self, document, base_url=None):
 
         def attrib_to_dict(attribs):
             # convert _attrib type to dict
-            node_dict = {}
-            for attrib, value in attribs.items():
-                node_dict.update({attrib: value})
-            return node_dict
+            return dict(attribs.items())
 
         def populate_results(node, main_attrib):
             # fill list with DC Elements or DC Terms
@@ -143,8 +141,8 @@ def populate_results(node, main_attrib):
         namespaces_nodes = document.xpath('//link[contains(@rel,"schema")]')
         namespaces = {}
         for i in namespaces_nodes:
-            if i.attrib['href'] in _URL_NAMESPACES:
-                namespaces.update({re.sub(r"schema\.", "", i.attrib['rel']): i.attrib['href']})
+            if strip_html5_whitespace(i.attrib['href']) in _URL_NAMESPACES:
+                namespaces.update({re.sub(r"schema\.", "", i.attrib['rel']): strip_html5_whitespace(i.attrib['href'])})
 
         list_meta_node = document.xpath('//meta')
         for meta_node in list_meta_node:
diff --git a/extruct/uniform.py b/extruct/uniform.py
index aefbcc7b..cf97717c 100644
--- a/extruct/uniform.py
+++ b/extruct/uniform.py
@@ -26,7 +26,7 @@ def _umicrodata_microformat(extracted, schema_context):
 
 def _udublincore(extracted):
     out = []
-    for obj in extracted:
+    for obj in list(extracted):
         context = obj.pop('namespaces', None)
         obj['@context'] = context
         elements = obj['elements']
diff --git a/tests/samples/songkick/elysianfields.json b/tests/samples/songkick/elysianfields.json
index 9ad1d6f5..f7c10169 100644
--- a/tests/samples/songkick/elysianfields.json
+++ b/tests/samples/songkick/elysianfields.json
@@ -265,7 +265,7 @@
     "dublincore": [
         {
             "namespaces": {
-        },
+            },
             "elements": [
                 {
                     "name": "description",
@@ -273,7 +273,8 @@
                     "URI": "http://purl.org/dc/elements/1.1/description"
                 }
             ],
-   "terms": [
-
-   ]}]
+            "terms": [
+            ]
+        }
+    ]
 }
\ No newline at end of file

From 32e416b80dbbf248dc5fb568bb282b37ec4876bb Mon Sep 17 00:00:00 2001
From: Joaquin <joaquingc123@gmail.com>
Date: Fri, 18 Jan 2019 10:10:46 -0500
Subject: [PATCH 08/11] Add local variable to improve legibility

---
 extruct/dublincore.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/extruct/dublincore.py b/extruct/dublincore.py
index fed54ab4..3e05209e 100644
--- a/extruct/dublincore.py
+++ b/extruct/dublincore.py
@@ -141,8 +141,9 @@ def populate_results(node, main_attrib):
         namespaces_nodes = document.xpath('//link[contains(@rel,"schema")]')
         namespaces = {}
         for i in namespaces_nodes:
-            if strip_html5_whitespace(i.attrib['href']) in _URL_NAMESPACES:
-                namespaces.update({re.sub(r"schema\.", "", i.attrib['rel']): strip_html5_whitespace(i.attrib['href'])})
+            url = strip_html5_whitespace(i.attrib['href'])
+            if url in _URL_NAMESPACES:
+                namespaces.update({re.sub(r"schema\.", "", i.attrib['rel']): url})
 
         list_meta_node = document.xpath('//meta')
         for meta_node in list_meta_node:

From bd1448fd917737f79cc39f707c725fe0d2988e67 Mon Sep 17 00:00:00 2001
From: Joaquin <joaquingc123@gmail.com>
Date: Sun, 20 Sep 2020 17:19:13 -0500
Subject: [PATCH 09/11] Change shallow cpy to deep cpy, update extruct, readme.

---
 README.rst          | 344 ++++++++++++++------------------------------
 extruct/_extruct.py |   4 +-
 extruct/uniform.py  |   6 +-
 3 files changed, 116 insertions(+), 238 deletions(-)

diff --git a/README.rst b/README.rst
index 9f7ec438..1cfaca65 100644
--- a/README.rst
+++ b/README.rst
@@ -17,8 +17,6 @@ extruct
 
 *extruct* is a library for extracting embedded metadata from HTML markup.
 
-It also has a built-in HTTP server to test its output as JSON.
-
 Currently, *extruct* supports:
 
 - `W3C's HTML Microdata`_
@@ -64,16 +62,18 @@ Let's try this on a webpage that uses all the syntaxes supported (RDFa with `ogp
 
 First fetch the HTML using python-requests and then feed the response body to ``extruct``::
 
-    >>> import extruct
-    >>> import requests
-    >>> import pprint
-    >>> from w3lib.html import get_base_url
-    >>> pp = pprint.PrettyPrinter(indent=2)
-    >>> r = requests.get('https://www.optimizesmart.com/how-to-use-open-graph-protocol/')
-    >>> base_url = get_base_url(r.text, r.url)
-    >>> data = extruct.extract(r.text, base_url=base_url)
-    >>> pp.pprint(data)
-    { 'dublincore': [ { 'elements': [ { 'URI': 'http://purl.org/dc/elements/1.1/description',
+  >>> import extruct
+  >>> import requests
+  >>> import pprint
+  >>> from w3lib.html import get_base_url
+  >>>
+  >>> pp = pprint.PrettyPrinter(indent=2)
+  >>> r = requests.get('https://www.optimizesmart.com/how-to-use-open-graph-protocol/')
+  >>> base_url = get_base_url(r.text, r.url)
+  >>> data = extruct.extract(r.text, base_url=base_url)
+  >>>
+  >>> pp.pprint(data)
+  { 'dublincore': [ { 'elements': [ { 'URI': 'http://purl.org/dc/elements/1.1/description',
                                         'content': 'What is Open Graph Protocol '
                                                    'and why you need it? Learn to '
                                                    'implement Open Graph Protocol '
@@ -82,114 +82,100 @@ First fetch the HTML using python-requests and then feed the response body to ``
                                         'name': 'description'}],
                         'namespaces': {},
                         'terms': []}],
-      'json-ld': [ { '@context': 'https://schema.org',
-                     '@id': 'https://www.optimizesmart.com/#organization',
-                     '@type': 'Organization',
-                     'logo': 'https://www.optimizesmart.com/wp-content/uploads/2016/03/optimize-smart-Twitter-logo.jpg',
-                     'name': 'Optimize Smart',
-                     'sameAs': [ 'https://www.facebook.com/optimizesmart/',
-                                 'https://uk.linkedin.com/in/analyticsnerd',
-                                 'https://www.youtube.com/user/optimizesmart',
-                                 'https://twitter.com/analyticsnerd'],
-                     'url': 'https://www.optimizesmart.com/'},
-                   { '@context': 'http://schema.org',
-                     '@id': '',
-                     '@type': 'ProfessionalService',
-                     'address': { '@type': 'PostalAddress',
-                                  'addressCountry': 'GB',
-                                  'addressLocality': 'Southampton',
-                                  'postalCode': '',
-                                  'streetAddress': ''},
-                     'image': 'https://www.optimizesmart.com/wp-content/themes/Sept17OptimizeSmartDEV/images/logo-small.png',
-                     'name': 'Optimize Smart',
-                     'openingHoursSpecification': { '@type': 'OpeningHoursSpecification',
-                                                    'closes': '23:59',
-                                                    'dayOfWeek': [ 'Monday',
-                                                                   'Tuesday',
-                                                                   'Wednesday',
-                                                                   'Thursday',
-                                                                   'Friday',
-                                                                   'Saturday',
-                                                                   'Sunday'],
-                                                    'opens': '00:00'},
-                     'sameAs': [ 'https://www.facebook.com/optimizesmart/',
-                                 'https://twitter.com/OptimizeSmart',
-                                 'https://www.youtube.com/user/optimizesmart',
-                                 'https://www.linkedin.com/in/analyticsnerd/'],
-                     'telephone': '',
-                     'url': 'https://www.optimizesmart.com'}],
-      'microdata': [ { 'properties': {'headline': ''},
-                       'type': 'http://schema.org/WPHeader'}],
-      'microformat': [ { 'children': [ { 'properties': { 'category': [ 'facebook-tracking',
-                                                                       'specialized-tracking']},
-                                         'type': ['h-entry']}],
-                         'properties': {},
-                         'type': ['h-feed']}],
-      'opengraph': [ { 'namespace': { 'article': 'http://ogp.me/ns/article#',
-                                      'og': 'http://ogp.me/ns#'},
-                       'properties': [ ('og:locale', 'en_US'),
-                                       ('og:type', 'article'),
-                                       ( 'og:title',
-                                         'Open Graph Protocol for Facebook '
-                                         'explained with examples'),
-                                       ( 'og:description',
-                                         'What is Open Graph Protocol and why you '
-                                         'need it? Learn to implement Open Graph '
-                                         'Protocol for Facebook on your website. '
-                                         'Open Graph Protocol Meta Tags.'),
-                                       ( 'og:url',
-                                         'https://www.optimizesmart.com/how-to-use-open-graph-protocol/'),
-                                       ('og:site_name', 'Optimize Smart'),
-                                       ( 'article:publisher',
-                                         'https://www.facebook.com/optimizesmart/'),
-                                       ('article:section', 'Facebook Tracking'),
-                                       ( 'article:published_time',
-                                         '2017-02-02T18:57:23+00:00'),
-                                       ( 'article:modified_time',
-                                         '2019-01-11T10:49:01+00:00'),
-                                       ( 'og:updated_time',
-                                         '2019-01-11T10:49:01+00:00'),
-                                       ( 'og:image',
-                                         'https://www.optimizesmart.com/wp-content/uploads/2010/07/open-graph-protocol.jpg'),
-                                       ( 'og:image:secure_url',
-                                         'https://www.optimizesmart.com/wp-content/uploads/2010/07/open-graph-protocol.jpg'),
-                                       ('og:image:width', '711'),
-                                       ('og:image:height', '309')]}],
-      'rdfa': [ { '@id': 'https://www.optimizesmart.com/how-to-use-open-graph-protocol/#header',
-                  'http://www.w3.org/1999/xhtml/vocab#role': [ { '@id': 'http://www.w3.org/1999/xhtml/vocab#banner'}]},
-                { '@id': 'https://www.optimizesmart.com/how-to-use-open-graph-protocol/',
-                  'article:modified_time': [ { '@value': '2019-01-11T10:49:01+00:00'}],
-                  'article:published_time': [ { '@value': '2017-02-02T18:57:23+00:00'}],
-                  'article:publisher': [ { '@value': 'https://www.facebook.com/optimizesmart/'}],
-                  'article:section': [{'@value': 'Facebook Tracking'}],
-                  'http://ogp.me/ns#description': [ { '@value': 'What is Open '
-                                                                'Graph Protocol '
-                                                                'and why you need '
-                                                                'it? Learn to '
-                                                                'implement Open '
-                                                                'Graph Protocol '
-                                                                'for Facebook on '
-                                                                'your website. '
-                                                                'Open Graph '
-                                                                'Protocol Meta '
-                                                                'Tags.'}],
-                  'http://ogp.me/ns#image': [ { '@value': 'https://www.optimizesmart.com/wp-content/uploads/2010/07/open-graph-protocol.jpg'}],
-                  'http://ogp.me/ns#image:height': [{'@value': '309'}],
-                  'http://ogp.me/ns#image:secure_url': [ { '@value': 'https://www.optimizesmart.com/wp-content/uploads/2010/07/open-graph-protocol.jpg'}],
-                  'http://ogp.me/ns#image:width': [{'@value': '711'}],
-                  'http://ogp.me/ns#locale': [{'@value': 'en_US'}],
-                  'http://ogp.me/ns#site_name': [{'@value': 'Optimize Smart'}],
-                  'http://ogp.me/ns#title': [ { '@value': 'Open Graph Protocol for '
-                                                          'Facebook explained with '
-                                                          'examples'}],
-                  'http://ogp.me/ns#type': [{'@value': 'article'}],
-                  'http://ogp.me/ns#updated_time': [ { '@value': '2019-01-11T10:49:01+00:00'}],
-                  'http://ogp.me/ns#url': [ { '@value': 'https://www.optimizesmart.com/how-to-use-open-graph-protocol/'}],
-                  'https://api.w.org/': [ { '@id': 'https://www.optimizesmart.com/wp-json/'}]}]}
+
+  'json-ld': [ { '@context': 'https://schema.org',
+                   '@id': '#organization',
+                   '@type': 'Organization',
+                   'logo': 'https://www.optimizesmart.com/wp-content/uploads/2016/03/optimize-smart-Twitter-logo.jpg',
+                   'name': 'Optimize Smart',
+                   'sameAs': [ 'https://www.facebook.com/optimizesmart/',
+                               'https://uk.linkedin.com/in/analyticsnerd',
+                               'https://www.youtube.com/user/optimizesmart',
+                               'https://twitter.com/analyticsnerd'],
+                   'url': 'https://www.optimizesmart.com/'}],
+    'microdata': [ { 'properties': {'headline': ''},
+                     'type': 'http://schema.org/WPHeader'}],
+    'microformat': [ { 'children': [ { 'properties': { 'category': [ 'specialized-tracking'],
+                                                       'name': [ 'Open Graph '
+                                                                 'Protocol for '
+                                                                 'Facebook '
+                                                                 'explained with '
+                                                                 'examples\n'
+                                                                 '\n'
+                                                                 'Specialized '
+                                                                 'Tracking\n'
+                                                                 '\n'
+                                                                 '\n'
+                                                                 (...)
+                                                                 'Follow '
+                                                                 '@analyticsnerd\n'
+                                                                 '!function(d,s,id){var '
+                                                                 "js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, "
+                                                                 "'script', "
+                                                                 "'twitter-wjs');"]},
+                                       'type': ['h-entry']}],
+                       'properties': { 'name': [ 'Open Graph Protocol for '
+                                                 'Facebook explained with '
+                                                 'examples\n'
+                                                 (...)
+                                                 'Follow @analyticsnerd\n'
+                                                 '!function(d,s,id){var '
+                                                 "js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, "
+                                                 "'script', 'twitter-wjs');"]},
+                       'type': ['h-feed']}],
+    'opengraph': [ { 'namespace': {'og': 'http://ogp.me/ns#'},
+                     'properties': [ ('og:locale', 'en_US'),
+                                     ('og:type', 'article'),
+                                     ( 'og:title',
+                                       'Open Graph Protocol for Facebook '
+                                       'explained with examples'),
+                                     ( 'og:description',
+                                       'What is Open Graph Protocol and why you '
+                                       'need it? Learn to implement Open Graph '
+                                       'Protocol for Facebook on your website. '
+                                       'Open Graph Protocol Meta Tags.'),
+                                     ( 'og:url',
+                                       'https://www.optimizesmart.com/how-to-use-open-graph-protocol/'),
+                                     ('og:site_name', 'Optimize Smart'),
+                                     ( 'og:updated_time',
+                                       '2018-03-09T16:26:35+00:00'),
+                                     ( 'og:image',
+                                       'https://www.optimizesmart.com/wp-content/uploads/2010/07/open-graph-protocol.jpg'),
+                                     ( 'og:image:secure_url',
+                                       'https://www.optimizesmart.com/wp-content/uploads/2010/07/open-graph-protocol.jpg')]}],
+    'rdfa': [ { '@id': 'https://www.optimizesmart.com/how-to-use-open-graph-protocol/#header',
+                'http://www.w3.org/1999/xhtml/vocab#role': [ { '@id': 'http://www.w3.org/1999/xhtml/vocab#banner'}]},
+              { '@id': 'https://www.optimizesmart.com/how-to-use-open-graph-protocol/',
+                'article:modified_time': [ { '@value': '2018-03-09T16:26:35+00:00'}],
+                'article:published_time': [ { '@value': '2010-07-02T18:57:23+00:00'}],
+                'article:publisher': [ { '@value': 'https://www.facebook.com/optimizesmart/'}],
+                'article:section': [{'@value': 'Specialized Tracking'}],
+                'http://ogp.me/ns#description': [ { '@value': 'What is Open '
+                                                              'Graph Protocol '
+                                                              'and why you need '
+                                                              'it? Learn to '
+                                                              'implement Open '
+                                                              'Graph Protocol '
+                                                              'for Facebook on '
+                                                              'your website. '
+                                                              'Open Graph '
+                                                              'Protocol Meta '
+                                                              'Tags.'}],
+                'http://ogp.me/ns#image': [ { '@value': 'https://www.optimizesmart.com/wp-content/uploads/2010/07/open-graph-protocol.jpg'}],
+                'http://ogp.me/ns#image:secure_url': [ { '@value': 'https://www.optimizesmart.com/wp-content/uploads/2010/07/open-graph-protocol.jpg'}],
+                'http://ogp.me/ns#locale': [{'@value': 'en_US'}],
+                'http://ogp.me/ns#site_name': [{'@value': 'Optimize Smart'}],
+                'http://ogp.me/ns#title': [ { '@value': 'Open Graph Protocol for '
+                                                        'Facebook explained with '
+                                                        'examples'}],
+                'http://ogp.me/ns#type': [{'@value': 'article'}],
+                'http://ogp.me/ns#updated_time': [ { '@value': '2018-03-09T16:26:35+00:00'}],
+                'http://ogp.me/ns#url': [ { '@value': 'https://www.optimizesmart.com/how-to-use-open-graph-protocol/'}],
+                'https://api.w.org/': [ { '@id': 'https://www.optimizesmart.com/wp-json/'}]}]}
 
 Select syntaxes
 +++++++++++++++
-It is possible to select which syntaxes to extract by passing a list with the desired ones to extract. Valid values: 'microdata', 'json-ld', 'opengraph', 'microformat', 'rdfa', 'dublincore'. If no list is passed all syntaxes will be extracted and returned::
+It is possible to select which syntaxes to extract by passing a list with the desired ones to extract. Valid values: 'microdata', 'json-ld', 'opengraph', 'microformat', 'rdfa' and 'dublincore'. If no list is passed all syntaxes will be extracted and returned::
 
   >>> r = requests.get('http://www.songkick.com/artists/236156-elysian-fields')
   >>> base_url = get_base_url(r.text, r.url)
@@ -235,7 +221,7 @@ Uniform
 +++++++
 Another option is to uniform the output of microformat, opengraph, microdata, dublincore and json-ld syntaxes to the following structure: ::
 
-    {'@context': 'http://example.com', 
+    {'@context': 'http://example.com',
                  '@type': 'example_type',
                  /* All other the properties in keys here */
                  }
@@ -609,7 +595,7 @@ Microformat extraction
         ]
       }
    }]
-   
+
 DublinCore extraction
 ++++++++++++++++++++++++++++++
 ::
@@ -621,8 +607,8 @@ DublinCore extraction
     ... <title>Expressing Dublin Core in HTML/XHTML meta and link elements</title>
     ... <link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" />
     ... <link rel="schema.DCTERMS" href="http://purl.org/dc/terms/" />
-    ... 
-    ... 
+    ...
+    ...
     ... <meta name="DC.title" lang="en" content="Expressing Dublin Core
     ... in HTML/XHTML meta and link elements" />
     ... <meta name="DC.creator" content="Andy Powell, UKOLN, University of Bath" />
@@ -684,106 +670,6 @@ DublinCore extraction
                      'rel': 'DCTERMS.replaces'}]}]
 
 
-
-REST API service
-----------------
-
-*extruct* also ships with a REST API service to test its output from URLs.
-
-Dependencies
-++++++++++++
-
-* bottle_ (Web framework)
-* gevent_ (Aysnc framework)
-* requests_
-
-.. _bottle: https://pypi.python.org/pypi/bottle
-.. _gevent: http://www.gevent.org/
-.. _requests: http://docs.python-requests.org/
-
-Usage
-+++++
-
-::
-
-    python -m extruct.service
-
-launches an HTTP server listening on port 10005.
-
-Methods supported
-+++++++++++++++++
-
-::
-
-    /extruct/<URL>
-    method = GET
-
-
-    /extruct/batch
-    method = POST
-    params:
-        urls - a list of URLs separted by newlines
-        urlsfile - a file with one URL per line
-
-E.g. http://localhost:10005/extruct/http://www.sarenza.com/i-love-shoes-susket-s767163-p0000119412
-
-will output something like this:
-
->>>
-{ 'json-ld': [ { '@context': 'http://schema.org',
-                 '@id': 'FP',
-                 '@type': 'Product',
-                 'brand': { '@type': 'Brand',
-                            'url': 'https://www.sarenza.com/i-love-shoes'},
-                 'color': ['Lava', 'Black', 'Lt grey'],
-                 'image': [ 'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_09.jpg?201509221045&v=20180313113923',
-                            'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_02.jpg?201509291747&v=20180313113923',
-                            'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_03.jpg?201509221045&v=20180313113923',
-                            'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_04.jpg?201509221045&v=20180313113923',
-                            'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_05.jpg?201509221045&v=20180313113923',
-                            'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_06.jpg?201509221045&v=20180313113923',
-                            'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_07.jpg?201509221045&v=20180313113923',
-                            'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_08.jpg?201509221045&v=20180313113923'],
-                 'name': 'Susket',
-                 'offers': { '@type': 'AggregateOffer',
-                             'availability': 'InStock',
-                             'highPrice': '49.00',
-                             'lowPrice': '0.00',
-                             'price': '0.00',
-                             'priceCurrency': 'EUR'}}],
-  'microdata': [ { 'properties': { 'average': '4.7',
-                                   'best': '5',
-                                   'itemreviewed': 'Sarenza',
-                                   'rating': '4.7 / 5\n\t\t  (4 066 avis)',
-                                   'votes': '4 066'},
-                   'type': 'http://data-vocabulary.org/Review-aggregate'}],
-  'microformat': [],
-  'opengraph': [ { 'namespace': {'og': 'http://ogp.me/ns#'},
-                   'properties': [ ( 'og:title',
-                                     'I Love Shoes Susket @sarenza.com'),
-                                   ( 'og:image',
-                                     'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_09.jpg?201509221045&v=20180313113923'),
-                                   ('og:site_name', 'sarenza.com'),
-                                   ('og:type', 'product'),
-                                   ('og:description', '...'),
-                                   ( 'og:url',
-                                     'https://www.sarenza.com/i-love-shoes-susket-s767163-p0000119412'),
-                                   ('og:country-name', 'FRA')]}],
-  'rdfa': [ { '@id': 'https://www.sarenza.com/i-love-shoes-susket-s767163-p0000119412',
-              'http://ogp.me/ns#country-name': [{'@value': 'FRA'}],
-              'http://ogp.me/ns#description': [{'@value': '...'}],
-              'http://ogp.me/ns#image': [ { '@value': 'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_09.jpg?201509221045&v=20180313113923'}],
-              'http://ogp.me/ns#site_name': [{'@value': 'sarenza.com'}],
-              'http://ogp.me/ns#title': [ { '@value': 'I Love Shoes Susket '
-                                                      '@sarenza.com'}],
-              'http://ogp.me/ns#type': [{'@value': 'product'}],
-              'http://ogp.me/ns#url': [ { '@value': 'https://www.sarenza.com/i-love-shoes-susket-s767163-p0000119412'}],
-              'http://ogp.me/ns/fb#admins': [{'@value': '100001934697625'}],
-              'http://ogp.me/ns/fb#app_id': [{'@value': '148128758532914'}]},
-            { '@id': '_:Ncf1962068aa142b29000813372db7841',
-              'http://www.w3.org/1999/xhtml/vocab#role': [ { '@id': 'http://www.w3.org/1999/xhtml/vocab#navigation'}]}]}
-
-
 Command Line Tool
 -----------------
 
@@ -821,7 +707,7 @@ those, you can pass their individual names collected in a list through 'syntaxes
 For example, this command extracts only Microdata and JSON-LD metadata from
 "http://example.com"::
 
-    extruct "http://example.com" --syntaxes microdata json-ld 
+    extruct "http://example.com" --syntaxes microdata json-ld
 
 NB syntaxes names passed must correspond to these: microdata, json-ld, rdfa, opengraph, microformat
 
@@ -847,16 +733,4 @@ Use tox_ to run tests with different Python versions::
     tox
 
 
-.. _tox: https://testrun.org/tox/latest/
-
-
-Versioning
-----------
-
-Use bumpversion_ to conveniently change project version::
-
-    bumpversion patch  # 0.0.0 -> 0.0.1
-    bumpversion minor  # 0.0.1 -> 0.1.0
-    bumpversion major  # 0.1.0 -> 1.0.0
-
-.. _bumpversion: https://pypi.python.org/pypi/bumpversion
+.. _tox: https://testrun.org/tox/latest/
\ No newline at end of file
diff --git a/extruct/_extruct.py b/extruct/_extruct.py
index 81a951f6..d5016901 100644
--- a/extruct/_extruct.py
+++ b/extruct/_extruct.py
@@ -147,7 +147,9 @@ def extract(htmlstring,
 
         for syntax, uniform, raw, schema_context in uniform_processors:
             try:
-                if syntax in ['opengraph', 'dublincore']:
+                if syntax == 'opengraph':
+                    output[syntax] = uniform(raw, with_og_array=with_og_array)
+                elif syntax == 'dublincore':
                     output[syntax] = uniform(raw)
                 else:
                     output[syntax] = uniform(raw, schema_context)
diff --git a/extruct/uniform.py b/extruct/uniform.py
index cf97717c..74f16587 100644
--- a/extruct/uniform.py
+++ b/extruct/uniform.py
@@ -1,3 +1,4 @@
+import copy
 from six.moves.urllib.parse import urlparse, urljoin
 from extruct.dublincore import get_lower_attrib
 
@@ -26,11 +27,12 @@ def _umicrodata_microformat(extracted, schema_context):
 
 def _udublincore(extracted):
     out = []
-    for obj in list(extracted):
+    extracted_cpy = copy.deepcopy(extracted)
+    for obj in extracted_cpy:
         context = obj.pop('namespaces', None)
         obj['@context'] = context
         elements = obj['elements']
-        for element in list(elements):
+        for element in elements:
             for key, value in element.items():
                 if get_lower_attrib(value) == 'type':
                     obj['@type'] = element['content']

From dee37e6dd649d2d6447da87b6b1824b28fdce63e Mon Sep 17 00:00:00 2001
From: Joaquin <joaquingc123@gmail.com>
Date: Thu, 1 Oct 2020 23:07:05 -0500
Subject: [PATCH 10/11] update README.rst, normalize indentation

---
 README.rst                             |  2 +-
 tests/samples/songkick/tovestyrke.json | 25 +++++++++++++------------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/README.rst b/README.rst
index 45d1f607..ee0ee2de 100644
--- a/README.rst
+++ b/README.rst
@@ -33,7 +33,7 @@ Currently, *extruct* supports:
 .. _Microformat: http://microformats.org/wiki/Main_Page
 .. _mf2py: https://github.com/microformats/mf2py
 .. _Facebook's Open Graph: http://ogp.me/
-.. _Dublin Core Metadata: http://dublincore.org/documents/dcq-html/
+.. _Dublin Core Metadata: https://www.dublincore.org/specifications/dublin-core/dcq-html/
 
 The microdata algorithm is a revisit of `this Scrapinghub blog post`_ showing how to use EXSLT extensions.
 
diff --git a/tests/samples/songkick/tovestyrke.json b/tests/samples/songkick/tovestyrke.json
index 4e47acce..068df67a 100644
--- a/tests/samples/songkick/tovestyrke.json
+++ b/tests/samples/songkick/tovestyrke.json
@@ -189,17 +189,18 @@
         }
     ],
     "microformat": [],
-     "dublincore": [
+    "dublincore": [
          {
-             "namespaces": {
-
-             },
-   "elements": [
-       {
-           "name": "description",
-           "content": "Past concert. Tove Styrke concert with Geowulf at Hoxton Square Bar & Kitchen in London on 12 Jun 2017.",
-           "URI": "http://purl.org/dc/elements/1.1/description"
-       }],
-   "terms": [
-   ]}]
+             "namespaces": {},
+             "elements": [
+                 {
+                     "name": "description",
+                     "content": "Past concert. Tove Styrke concert with Geowulf at Hoxton Square Bar & Kitchen in London on 12 Jun 2017.",
+                     "URI": "http://purl.org/dc/elements/1.1/description"
+                 }
+             ],
+             "terms": [
+             ]
+         }
+    ]
 }
\ No newline at end of file

From 043a4795235ea4ba79b39467297f8a298a5d5606 Mon Sep 17 00:00:00 2001
From: Joaquin <joaquingc123@gmail.com>
Date: Sun, 4 Oct 2020 16:41:05 -0500
Subject: [PATCH 11/11] Specify DC version, update link.

---
 README.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index ee0ee2de..5cbc809a 100644
--- a/README.rst
+++ b/README.rst
@@ -24,7 +24,7 @@ Currently, *extruct* supports:
 - `Microformat`_ via `mf2py`_
 - `Facebook's Open Graph`_
 - (experimental) `RDFa`_ via `rdflib`_
-- `Dublin Core Metadata`_
+- `Dublin Core Metadata (DC-HTML-2003)`_
 
 .. _W3C's HTML Microdata: http://www.w3.org/TR/microdata/
 .. _embedded JSON-LD: http://www.w3.org/TR/json-ld/#embedding-json-ld-in-html-documents
@@ -33,7 +33,7 @@ Currently, *extruct* supports:
 .. _Microformat: http://microformats.org/wiki/Main_Page
 .. _mf2py: https://github.com/microformats/mf2py
 .. _Facebook's Open Graph: http://ogp.me/
-.. _Dublin Core Metadata: https://www.dublincore.org/specifications/dublin-core/dcq-html/
+.. _Dublin Core Metadata (DC-HTML-2003): https://www.dublincore.org/specifications/dublin-core/dcq-html/2003-11-30/
 
 The microdata algorithm is a revisit of `this Scrapinghub blog post`_ showing how to use EXSLT extensions.