Skip to content

Commit

Permalink
Add basic spell checking, via Wikipedia:Lists of common misspellings/…
Browse files Browse the repository at this point in the history
…For machines
  • Loading branch information
theopolisme committed Jul 23, 2013
1 parent 46b2498 commit 74a4537
Show file tree
Hide file tree
Showing 2 changed files with 4,339 additions and 2 deletions.
12 changes: 10 additions & 2 deletions gan.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from theobot import password
from theobot import bot
from theobot import spellcheck

from itertools import groupby
import collections
Expand Down Expand Up @@ -80,13 +81,20 @@ def paragraph(lines):
if not re.search(r"""(sfn|\<ref)""",p) and len(p) > 100: # only look at paragraphs longer than 100 chars
alerts.append("\n* ''(beta)'' Lacking a citation in the paragraph beginning {{{{xt|{}...}}}}".format(p[:70]))

# check for common misspelled words from [[Wikipedia:Lists of common_misspellings/For machines]]
print "Checking for misspelled words..."
sp = []
for spell_tuple in spellcheck.Misspellings(text).check():
#for spell_tuple in spellcheck.Misspellings(text).check():
sp.append("\"{0}\" (line {1})".format(spell_tuple[1],spell_tuple[0]))
if len(sp) > 0:
alerts.append("\n* Common typo(s) or misspelling(s) detected: " + ', '.join(sp))

if len(alerts) > 0:
results += ''.join(alerts)
else:
results += "\n''There are no alerts for this page.''"

# !todo misspelled words?

return results

def cleanup(text):
Expand Down
Loading

0 comments on commit 74a4537

Please sign in to comment.