Skip to content

Commit

Permalink
Merge pull request #121 from scrapinghub/skip_empty_on_flattening_og
Browse files Browse the repository at this point in the history
OpenGraph skipping empty for properties with values on flattening
  • Loading branch information
ivanprado committed Jul 26, 2019
2 parents 6df8e19 + 5fa98cb commit f6ee940
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 1 deletion.
8 changes: 7 additions & 1 deletion extruct/uniform.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@
def _uopengraph(extracted):
out = []
for obj in extracted:
flattened = dict(reversed(obj['properties']))
# In order of appearance in the page
properties = list(reversed(obj['properties']))
# Ensuring that never empty value is returned if there is a duplicated
# property with non empty value
non_empty_props = {k for k, v in properties if v and v.strip()}
flattened = {k: v for k, v in properties
if k not in non_empty_props or (v and v.strip())}
t = flattened.pop('og:type', None)
if t:
flattened['@type'] = t
Expand Down
22 changes: 22 additions & 0 deletions tests/test_uniform.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,28 @@ def test_uopengraph_duplicated_priorities(self):
for k in range(5):
assert data[0]['prop_{}'.format(k)] == 'value_0'

# Ensures that empty is not returned if a property contains any
# non empty value
data = _uopengraph([{'properties':
[('prop_empty', ' '),

('prop_non_empty', ' '),
('prop_non_empty', 'value!'),

('prop_non_empty2', 'value!'),
('prop_non_empty2', ' '),

('prop_non_empty3', ' '),
('prop_non_empty3', 'value!'),
('prop_non_empty3', 'other value'),
],
'namespace': 'namespace'}])
assert data[0]['prop_empty'] == ' '
assert data[0]['prop_non_empty'] == 'value!'
assert data[0]['prop_non_empty2'] == 'value!'
assert data[0]['prop_non_empty3'] == 'value!'


def test_umicroformat(self):
expected = [ { '@context': 'http://microformats.org/wiki/',
'@type': ['h-hidden-phone', 'h-hidden-tablet'],
Expand Down

0 comments on commit f6ee940

Please sign in to comment.