Permalink
Browse files

Add splitRx

  • Loading branch information...
1 parent a690306 commit 5e39d8f5852a0d54773e3d7ee624bcdddbab9e96 @titoBouzout committed May 13, 2014
Showing with 14 additions and 2 deletions.
  1. +9 −1 WordCount.py
  2. +5 −1 readme.md
View
@@ -24,6 +24,10 @@ def load(self):
Pref.running = False
Pref.wrdRx = re.compile(s.get('word_regexp', "^[^\w]?\w+[^\w]*$"), re.U)
Pref.wrdRx = Pref.wrdRx.match
+ Pref.splitRx = s.get('word_split', None)
+ if Pref.splitRx:
+ Pref.splitRx = re.compile(Pref.splitRx, re.U)
+ Pref.splitRx = Pref.splitRx.findall
Pref.enable_live_count = s.get('enable_live_count', True)
Pref.enable_readtime = s.get('enable_readtime', False)
Pref.enable_line_word_count = s.get('enable_line_word_count', False)
@@ -209,7 +213,11 @@ def count(self, content):
#=====2
wrdRx = Pref.wrdRx
- words = len([x for x in content.replace("'", '').split() if False == x.isdigit() and wrdRx(x)])
+ splitRx = Pref.splitRx
+ if splitRx:
+ words = len([x for x in splitRx(content) if False == x.isdigit() and wrdRx(x)])
+ else:
+ words = len([x for x in content.replace("'", '').split() if False == x.isdigit() and wrdRx(x)])
#Pref.elapsed_time = end = time.time() - begin;
#print 'Benchmark: '+str(end)
View
@@ -55,7 +55,11 @@ An estimated reading time is now appended to the end of the word count.
- `word_regexp` : ""
- Word Regular expression. Defaults empty, an internal regular expression is used.
+ Word Regular expression. Defaults empty, an internal regular expression is used. If the portion of text matches this RegExp then the word is counted.
+
+ - `word_split` : ""
+
+ Split portions of text to test later as words with a Regular expression. Defaults to String.split() with no arguments, means that content will trim() and empty values (all whitespaces) are not used. In case of containing some value different than empty, the return of "re.findall" will be used.
## Inspiration

0 comments on commit 5e39d8f

Please sign in to comment.