Skip to content

Commit

Permalink
Merge pull request #191 from aapa/urltitle-wikipedia
Browse files Browse the repository at this point in the history
Fix wikipedia title handler
  • Loading branch information
lepinkainen committed Dec 29, 2015
2 parents f9482bc + 59b9228 commit 932e496
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions pyfibot/modules/module_urltitle.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,9 +783,9 @@ def get_content(url):
'format': 'json',
'action': 'query',
'prop': 'extracts',
# request 5 sentences, because Wikipedia seems to think that
# period is always indicative of end of sentence
'exsentences': 5,
# request everything before the first section, because requesting
# only a limited number of sentences breaks randomly
'exintro': '',
'redirects': '',
'titles': clean_page_name(url)
}
Expand Down Expand Up @@ -813,9 +813,11 @@ def get_content(url):
content = re.sub(r'\(.*?\)', '', content)
# Remove " , ", which might be left behind after cleaning up
# the brackets
content = re.sub(' +([,.])', '\\1 ', content)
content = re.sub('\s+([,.])', '\\1 ', content)
# Remove multiple spaces
content = re.sub(' +', ' ', content)
content = re.sub('\s+', ' ', content)
# Strip possible trailing whitespace
content = content.rstrip()

# Define sentence break as something ending in a period and starting with a capital letter,
# with a whitespace or newline in between
Expand Down

0 comments on commit 932e496

Please sign in to comment.