Skip to content

Commit

Permalink
Merge pull request #86 from scrapinghub/cleanup-jsonld-tests
Browse files Browse the repository at this point in the history
TST cleanup TestJsonLD
  • Loading branch information
lopuhin committed Aug 9, 2018
2 parents cd56da2 + 37e813e commit 35e1d0f
Showing 1 changed file with 25 additions and 40 deletions.
65 changes: 25 additions & 40 deletions tests/test_jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,55 +9,40 @@
class TestJsonLD(unittest.TestCase):

def test_schemaorg_CreativeWork(self):
body = get_testdata('schema.org', 'CreativeWork.001.html')
expected = json.loads(get_testdata('schema.org', 'CreativeWork.001.jsonld').decode('UTF-8'))

jsonlde = JsonLdExtractor()
data = jsonlde.extract(body)
self.assertEqual(data, expected)
self.assertJsonLdCorrect(folder='schema.org', page='CreativeWork.001')

def test_songkick(self):
page = "Elysian Fields Brooklyn Tickets, The Owl Music Parlor, 31 Oct 2015"
body = get_testdata('songkick', '{}.html'.format(page))
expected = json.loads(get_testdata('songkick', '{}.jsonld'.format(page)).decode('UTF-8'))

jsonlde = JsonLdExtractor()
data = jsonlde.extract(body)
self.assertEqual(data, expected)
self.assertJsonLdCorrect(
folder='songkick',
page='Elysian Fields Brooklyn Tickets, The Owl Music Parlor, 31 Oct 2015')

def test_jsonld_with_comments(self):
for prefix in ['JoinAction.001', 'AllocateAction.001']:
body = get_testdata('schema.org.invalid', '{}.html'.format(prefix))
name = '{}.jsonld'.format(prefix)
expected = json.loads(get_testdata('schema.org.invalid', name).decode('UTF-8'))

jsonlde = JsonLdExtractor()
data = jsonlde.extract(body)
self.assertEqual(data, expected)
for prefix in ['JoinAction.001',
'AllocateAction.001',
]:
body = get_testdata('custom.invalid', '{}.html'.format(prefix))
expected = json.loads(get_testdata('custom.invalid', '{}.jsonld'.format(prefix)).decode('UTF-8'))

jsonlde = JsonLdExtractor()
data = jsonlde.extract(body)
self.assertEqual(data, expected)
for page in ['JoinAction.001', 'AllocateAction.001']:
self.assertJsonLdCorrect(folder='schema.org.invalid', page=page)

def test_jsonld_with_control_characters(self):
page = 'JSONLD_with_control_characters'
body = get_testdata('custom.invalid', '{}.html'.format(page))
expected = json.loads(get_testdata('custom.invalid', '{}.jsonld'.format(page)).decode('UTF-8'))
for page in ['JoinAction.001', 'AllocateAction.001']:
self.assertJsonLdCorrect(folder='custom.invalid', page=page)

jsonlde = JsonLdExtractor()
data = jsonlde.extract(body)
self.assertEqual(data, expected)
def test_jsonld_with_control_characters(self):
self.assertJsonLdCorrect(
folder='custom.invalid',
page='JSONLD_with_control_characters')

def test_jsonld_with_control_characters_comment(self):
page = 'JSONLD_with_control_characters_comment'
body = get_testdata('custom.invalid', '{}.html'.format(page))
expected = json.loads(get_testdata('custom.invalid', '{}.jsonld'.format(page)).decode('UTF-8'))
self.assertJsonLdCorrect(
folder='custom.invalid',
page='JSONLD_with_control_characters_comment')

def assertJsonLdCorrect(self, folder, page):
body, expected = self._get_body_expected(folder, page)
self._check_jsonld(body, expected)

def _get_body_expected(self, folder, page):
body = get_testdata(folder, '{}.html'.format(page))
expected = get_testdata(folder, '{}.jsonld'.format(page))
return body, json.loads(expected.decode('utf8'))

def _check_jsonld(self, body, expected):
jsonlde = JsonLdExtractor()
data = jsonlde.extract(body)
self.assertEqual(data, expected)

0 comments on commit 35e1d0f

Please sign in to comment.