Skip to content

Commit

Permalink
Merge pull request #161 from openculinary/dependencies/rdflib-5.0.0
Browse files Browse the repository at this point in the history
Add support for rdflib 5.0.0
  • Loading branch information
lopuhin committed Dec 12, 2020
2 parents c1e4803 + 42fe3c0 commit bf103f5
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 9 deletions.
7 changes: 3 additions & 4 deletions extruct/rdfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,14 @@
rdflib_logger.setLevel(logging.ERROR)

from rdflib import Graph, logger as rdflib_logger
from rdflib.plugins.parsers.pyRdfa import pyRdfa as PyRdfa, Options, logger as pyrdfa_logger
from rdflib.plugins.parsers.pyRdfa.initialcontext import initial_context
from pyRdfa import pyRdfa as PyRdfa, Options
from pyRdfa.initialcontext import initial_context

from extruct.utils import parse_xmldom_html


# silence rdflib/PyRdfa INFO logs
# silence rdflib INFO logs
rdflib_logger.setLevel(logging.ERROR)
pyrdfa_logger.setLevel(logging.ERROR)

initial_context["http://www.w3.org/2011/rdfa-context/rdfa-1.1"].ns.update({
"twitter": "https://dev.twitter.com/cards#",
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
# pip install -r requirements.txt
lxml
requests
rdflib<5.0.0
rdflib
rdflib-jsonld
pyrdfa3
mf2py>=1.1.0
six>=1.11
w3lib
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ def get_version():
packages=find_packages(exclude=['tests',]),
package_data={'extruct': ['VERSION']},
install_requires=['lxml',
'rdflib<5.0.0',
'rdflib',
'rdflib-jsonld',
'pyrdfa3',
'mf2py',
'w3lib',
'html-text>=0.5.1',
Expand Down
6 changes: 3 additions & 3 deletions tests/samples/misc/Portfolio_Niels_Lubberman.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@
],
"http://purl.org/rss/1.0/modules/content/encoded": [
{
"@value": "<p xml:lang=\"en\" xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:content=\"http://purl.org/rss/1.0/modules/content/\" xmlns:dc=\"http://purl.org/dc/terms/\" xmlns:foaf=\"http://xmlns.com/foaf/0.1/\" xmlns:og=\"http://ogp.me/ns#\" xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\" xmlns:sioc=\"http://rdfs.org/sioc/ns#\" xmlns:sioct=\"http://rdfs.org/sioc/types#\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\">Op deze vernieuwde website kunt u enkele van mijn projecten vinden, tevens kunt u lessen downloaden die ik heb gemaakt.</p>\n\n",
"@value": "<p xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:content=\"http://purl.org/rss/1.0/modules/content/\" xmlns:dc=\"http://purl.org/dc/terms/\" xmlns:foaf=\"http://xmlns.com/foaf/0.1/\" xmlns:og=\"http://ogp.me/ns#\" xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\" xmlns:sioc=\"http://rdfs.org/sioc/ns#\" xmlns:sioct=\"http://rdfs.org/sioc/types#\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\">Op deze vernieuwde website kunt u enkele van mijn projecten vinden, tevens kunt u lessen downloaden die ik heb gemaakt.</p>\n\n",
"@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"
},
{
"@value": "<p xml:lang=\"en\" xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:content=\"http://purl.org/rss/1.0/modules/content/\" xmlns:dc=\"http://purl.org/dc/terms/\" xmlns:foaf=\"http://xmlns.com/foaf/0.1/\" xmlns:og=\"http://ogp.me/ns#\" xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\" xmlns:sioc=\"http://rdfs.org/sioc/ns#\" xmlns:sioct=\"http://rdfs.org/sioc/types#\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\">Voeg mij nu toe aan uw professionele netwerk op LinkedIn.</p>\n\n",
"@value": "<p xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:content=\"http://purl.org/rss/1.0/modules/content/\" xmlns:dc=\"http://purl.org/dc/terms/\" xmlns:foaf=\"http://xmlns.com/foaf/0.1/\" xmlns:og=\"http://ogp.me/ns#\" xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\" xmlns:sioc=\"http://rdfs.org/sioc/ns#\" xmlns:sioct=\"http://rdfs.org/sioc/types#\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\">Voeg mij nu toe aan uw professionele netwerk op LinkedIn.</p>\n\n",
"@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"
},
{
"@value": "<p xml:lang=\"en\" xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:content=\"http://purl.org/rss/1.0/modules/content/\" xmlns:dc=\"http://purl.org/dc/terms/\" xmlns:foaf=\"http://xmlns.com/foaf/0.1/\" xmlns:og=\"http://ogp.me/ns#\" xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\" xmlns:sioc=\"http://rdfs.org/sioc/ns#\" xmlns:sioct=\"http://rdfs.org/sioc/types#\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\">Met behulp van de pijl hieronder kunt u mijn CV downloaden.</p>\n\n",
"@value": "<p xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:content=\"http://purl.org/rss/1.0/modules/content/\" xmlns:dc=\"http://purl.org/dc/terms/\" xmlns:foaf=\"http://xmlns.com/foaf/0.1/\" xmlns:og=\"http://ogp.me/ns#\" xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\" xmlns:sioc=\"http://rdfs.org/sioc/ns#\" xmlns:sioct=\"http://rdfs.org/sioc/types#\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\">Met behulp van de pijl hieronder kunt u mijn CV downloaden.</p>\n\n",
"@type": "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"
}
],
Expand Down
5 changes: 5 additions & 0 deletions tests/test_rdfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,17 @@
import unittest

from extruct.rdfa import RDFaExtractor
from lxml.etree import XML, canonicalize
from tests import get_testdata

def tupleize(d):
if isinstance(d, list):
return sorted(tupleize(e) for e in d)
if isinstance(d, dict):
# Workaround: canonicalize XML so that attribute re-ordering is ignored
# See: https://github.com/scrapinghub/extruct/pull/161
if d.get('@type') == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral':
d['@value'] = canonicalize(XML(d['@value']))
return sorted((k, tupleize(v)) for k, v in d.items())
return d

Expand Down

0 comments on commit bf103f5

Please sign in to comment.