Skip to content

Commit

Permalink
Merge 0e0c84a into 34dc93c
Browse files Browse the repository at this point in the history
  • Loading branch information
sawatzkylindsey committed Mar 24, 2018
2 parents 34dc93c + 0e0c84a commit 7750efd
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 1 deletion.
16 changes: 16 additions & 0 deletions tests/extract_html_format_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,19 @@ def test_text(self):
"<p>Text for section 5.1\n\n\n</p>"
)
)

def test_with_erroneous_edit(self):
page = self.wiki.page('Test_Edit')
self.maxDiff = None
section = page.section_by_title('Section with Edit')
self.assertEqual(section.title, 'Section with Edit')
self.assertEqual(
page.text,
(
"<p><b>Summary</b> text\n\n</p>\n\n" +
"<h2>Section 1</h2>\n" +
"<p>Text for section 1</p>\n\n"
"<h3>Section with Edit</h3>\n" +
"<p>Text for section with edit\n\n\n</p>"
)
)
30 changes: 30 additions & 0 deletions tests/mock_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,36 @@ def wikipedia_api_request(page, params):
}
}
},
'en:action=query&prop=extracts&titles=Test_Edit&': {
"batchcomplete": "",
"warnings": {
"extracts": {
"*": "\"exlimit\" was too large for a whole article extracts request, lowered to 1."
}
},
"query": {
"normalized": [
{
"from": "Test_Edit",
"to": "Test Edit"
}
],
"pages": {
"4": {
"pageid": 4,
"ns": 0,
"title": "Test Edit",
"extract": (
"<p><b>Summary</b> text\n\n</p>\n" +
"<h2>Section 1</h2>\n" +
"<p>Text for section 1</p>\n\n\n" +
"<h3><span id=\"s1.Edit\">Section with Edit</span><span>Edit</span></h3>\n" +
"<p>Text for section with edit\n\n\n</p>"
)
}
}
}
},
'en:action=query&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle&prop=info&titles=Test_1&': {
"batchcomplete": "",
"query": {
Expand Down
4 changes: 3 additions & 1 deletion wikipediaapi/wikipedia.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ class Namespace(object):
ExtractFormat.HTML: re.compile(
r'\n? *<h(\d)[^>]*?>(<span[^>]*><\/span>)? *' +
'(<span[^>]*>)? *(<span[^>]*><\/span>)? *(.*?) *' +
'(<\/span>)?<\/h\d>\n?'
'(<\/span>)?(<span>Edit<\/span>)?<\/h\d>\n?'
# ^^^^
# Example page with 'Edit' erroneous links: https://bit.ly/2ui4FWs
),
# ExtractFormat.PLAIN.value: re.compile(r'\n\n *(===*) (.*?) (===*) *\n'),
}
Expand Down

0 comments on commit 7750efd

Please sign in to comment.