Skip to content

Commit

Permalink
@rikima's contrib plus py3 patch, fixes #66
Browse files Browse the repository at this point in the history
  • Loading branch information
siznax committed Jun 22, 2017
1 parent 6af1662 commit 89a4d26
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 18 deletions.
50 changes: 39 additions & 11 deletions tests/test_advanced.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
import random
import unittest

import titles
import tests.titles as titles
import tests.wikidata_images as wikidata_images
import wptools

LANG = ['de', 'es', 'fr', 'hi', 'it', 'ja', 'nl', 'ru', 'sv', 'vi', 'zh']
Expand Down Expand Up @@ -100,14 +101,6 @@ def test_mixed_lang(self):
p = wptools.page('Abraham Lincoln', lang='zh').get_query(False)
self.assertEqual(p.wikibase, 'Q91')

def test_complex_infobox(self):
"""
Successfully populate complex infobox dict
"""
p = wptools.page('Aung San Suu Kyi').get_parse(False)
self.assertGreaterEqual(len(p.infobox), 32)
self.assertTrue('errors' not in p.infobox)

def test_thumbnail(self):
"""
Get a thumbnail image URL
Expand Down Expand Up @@ -196,7 +189,8 @@ class WPToolsToolTest(unittest.TestCase):
WPTOOL TESTS
"""

def test_wptool(self):
@staticmethod
def test_wptool():
'''
Get random page via wptool
'''
Expand All @@ -208,10 +202,43 @@ def test_wptool(self):
main(args(**cli))


class WPToolsUtilsTest(unittest.TestCase):
"""
Utils Tests
"""

def test_infobox_subelements(self):
"""
Get infobox data with sub-elements. Issue #66
"""
p = wptools.page("ONE OK ROCK", lang='ja').get_parse()
self.assertGreater(len(p.infobox['Genre'].split('<br')), 5)

def test_infobox_children(self):
"""
Get infobox data with list values. Issue #62
"""
p = wptools.page('Lewisit', lang='de').get_parse()
self.assertGreater(len(p.infobox['Dichte'].split('*')), 1)

def test_complex_infobox(self):
"""
Successfully populate complex infobox dict
"""
p = wptools.page('Aung San Suu Kyi').get_parse(False)
self.assertGreaterEqual(len(p.infobox), 32)
self.assertTrue('errors' not in p.infobox)


class WPToolsWikidataTest(unittest.TestCase):
"""
Wikidata Tests
"""

def test_wikidata_images(self):
import wikidata_images
"""
Get wikidata images from cache.
"""
page = wptools.page('test_wikidata_images')
page.cache['wikidata'] = wikidata_images.cache
page._set_wikidata()
Expand All @@ -228,6 +255,7 @@ def test_wikidata_images(self):
'rand': TestLoader().loadTestsFromTestCase(WPToolsRandomTest),
'rest': TestLoader().loadTestsFromTestCase(WPToolsRestBaseTest),
'tool': TestLoader().loadTestsFromTestCase(WPToolsToolTest),
'utils': TestLoader().loadTestsFromTestCase(WPToolsUtilsTest),
'wikidata': TestLoader().loadTestsFromTestCase(WPToolsWikidataTest),
}
suites['all'] = unittest.TestSuite(suites.values())
Expand Down
22 changes: 15 additions & 7 deletions wptools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,14 +245,22 @@ def template_to_dict(tree):

def text_with_children(node):
"""
return text content, with children if present
return text content with children (#62), sub-elements (#66)
https://stackoverflow.com/questions/4624062/get-all-text-inside-a-tag-in-lxml
"""
parts = ([node.text] +
list(chain(
*([tostring(c, with_tail=False),
c.tail] for c in node.getchildren())))
+ [node.tail])
return ''.join(filter(None, parts))
if sys.version.startswith('3'): # py3 needs encoding=str
parts = ([node.text] +
list(chain(
*([tostring(c, with_tail=False, encoding=str),
c.tail] for c in node.getchildren())))
+ [node.tail])
else:
parts = ([node.text] +
list(chain(
*([tostring(c, with_tail=False),
c.tail] for c in node.getchildren())))
+ [node.tail])
return ''.join(filter(lambda x: x or isinstance(x, str), parts))


def template_to_text(tmpl):
Expand Down

0 comments on commit 89a4d26

Please sign in to comment.