Skip to content

Commit

Permalink
Merge pull request #140 from ShivinDass/issue31
Browse files Browse the repository at this point in the history
Support for expanded opengraph metadata
  • Loading branch information
lopuhin committed Jun 4, 2020
2 parents 0648f1a + d9f7c8e commit f66c825
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 3 deletions.
11 changes: 8 additions & 3 deletions extruct/rdfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,13 @@

initial_context["http://www.w3.org/2011/rdfa-context/rdfa-1.1"].ns.update({
"twitter": "https://dev.twitter.com/cards#",
"fb": "http://ogp.me/ns/fb#"
"fb": "http://ogp.me/ns/fb#",
"og": "http://ogp.me/ns#",
"music": "http://ogp.me/ns/music#",
"video": "http://ogp.me/ns/video#",
"article": "http://ogp.me/ns/article#",
"book": "http://ogp.me/ns/book#",
"profile": "http://ogp.me/ns/profile#"
})


Expand All @@ -43,7 +49,6 @@ def extract_items(self, document, base_url=None, expanded=True):
vocab_cache_report=False,
refresh_vocab_cache=False,
check_lite=False)

g = PyRdfa(options, base=base_url).graph_from_DOM(document, graph=Graph(), pgraph=Graph())
jsonld_string = g.serialize(format='json-ld', auto_compact=not expanded).decode('utf-8')
return json.loads(jsonld_string)
return json.loads(jsonld_string)
40 changes: 40 additions & 0 deletions tests/samples/misc/expanded_OG_support_test.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="https://www.w3.org/1999/xhtml" xmlns:og="https://ogp.me/ns#" xmlns:fb="https://www.facebook.com/2008/fbml">
<head>
<title>Himanshu's Open Graph Protocol</title>
<meta http-equiv="Content-Type" content="text/html;charset=WINDOWS-1252" />
<meta http-equiv="Content-Language" content="en-us" />
<link rel="stylesheet" type="text/css" href="event-education.css" />
<meta name="verify-v1" content="so4y/3aLT7/7bUUB9f6iVXN0tv8upRwaccek7JKB1gs=" >
<meta property="og:image" content="https://www.eventeducation.com/images/982336_wedding_dayandouan_th.jpg"/>
<meta property="fb:admins" content="himanshu160"/>
<meta property="og:site_name" content="Event Education"/>

<meta property="og:url" content="http://www.nytimes.com/2016/12/15/arts/music/from-steet-theater-to-wagner-on-the-opera-stage.html" />
<meta property="og:type" content="article" />
<meta property="og:title" content="From Street Theater to Wagner on the Opera Stage" />
<meta property="og:description" content="which he set in Bangladesh instead of Norway. The production opens in Madrid on Saturday." />
<meta property="article:published" itemprop="datePublished" content="2016-12-15T05:55:55-05:00" />
<meta property="article:modified" itemprop="dateModified" content="2016-12-15T06:19:30-05:00" />
<meta property="article:section" itemprop="articleSection" content="Music" />
<meta property="article:section-taxonomy-id" itemprop="articleSection" content="C5BFA7D5-359C-427B-90E6-6B7245A6CDD8" />
<meta property="article:section_url" content="http://www.nytimes.com/section/arts" />
<meta property="article:top-level-section" content="arts" />
<meta property="fb:app_id" content="9869919170" />
<meta property="music:duration" content="60" />
<meta property="video:tag" content="Exhilerating" />
<meta property="book:release_date" content="2016-12-15T06:19:30-05:00" />
<meta property="profile:first_name" content="John" />
<meta property="profile:last_name" content="Lennon" />
</head>
<body>
<div id="fb-root"></div>
<script>(function(d, s, id) {
var js, fjs = d.getElementsByTagName(s)[0];
if (d.getElementById(id)) return;
js = d.createElement(s); js.id = id;
js.src = "//connect.facebook.net/en_US/all.js#xfbml=1&appId=501839739845103";
fjs.parentNode.insertBefore(js, fjs);
}(document, 'script', 'facebook-jssdk'));</script>
</body>
</html>
100 changes: 100 additions & 0 deletions tests/samples/misc/expanded_OG_support_test.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
[
{
"https://ogp.me/ns#url": [
{
"@value": "http://www.nytimes.com/2016/12/15/arts/music/from-steet-theater-to-wagner-on-the-opera-stage.html"
}
],
"http://ogp.me/ns/profile#first_name": [
{
"@value": "John"
}
],
"https://ogp.me/ns#type": [
{
"@value": "article"
}
],
"http://ogp.me/ns/article#section": [
{
"@value": "Music"
}
],
"http://ogp.me/ns/music#duration": [
{
"@value": "60"
}
],
"http://ogp.me/ns/article#modified": [
{
"@value": "2016-12-15T06:19:30-05:00"
}
],
"http://ogp.me/ns/video#tag": [
{
"@value": "Exhilerating"
}
],
"https://ogp.me/ns#site_name": [
{
"@value": "Event Education"
}
],
"http://ogp.me/ns/profile#last_name": [
{
"@value": "Lennon"
}
],
"https://www.facebook.com/2008/fbmladmins": [
{
"@value": "himanshu160"
}
],
"http://ogp.me/ns/article#section_url": [
{
"@value": "http://www.nytimes.com/section/arts"
}
],
"https://ogp.me/ns#title": [
{
"@value": "From Street Theater to Wagner on the Opera Stage"
}
],
"https://www.facebook.com/2008/fbmlapp_id": [
{
"@value": "9869919170"
}
],
"https://ogp.me/ns#image": [
{
"@value": "https://www.eventeducation.com/images/982336_wedding_dayandouan_th.jpg"
}
],
"http://ogp.me/ns/book#release_date": [
{
"@value": "2016-12-15T06:19:30-05:00"
}
],
"http://ogp.me/ns/article#section-taxonomy-id": [
{
"@value": "C5BFA7D5-359C-427B-90E6-6B7245A6CDD8"
}
],
"http://ogp.me/ns/article#published": [
{
"@value": "2016-12-15T05:55:55-05:00"
}
],
"https://ogp.me/ns#description": [
{
"@value": "which he set in Bangladesh instead of Norway. The production opens in Madrid on Saturday."
}
],
"@id": "http://www.example.com/index.html",
"http://ogp.me/ns/article#top-level-section": [
{
"@value": "arts"
}
]
}
]
11 changes: 11 additions & 0 deletions tests/test_rdfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,14 @@ def test_wikipedia_xhtml_rdfa_no_prefix(self):
data = rdfae.extract(body, base_url='http://nielslubberman.nl/drupal/')

self.assertJsonLDEqual(data, expected)

def test_expanded_opengraph_support(self):
body = get_testdata('misc','expanded_OG_support_test.html')
expected = json.loads(
get_testdata('misc','expanded_OG_support_test.json'
).decode('UTF-8'))

rdfae = RDFaExtractor()
data = rdfae.extract(body, base_url='http://www.example.com/index.html')

self.assertJsonLDEqual(data,expected)

0 comments on commit f66c825

Please sign in to comment.