-
Notifications
You must be signed in to change notification settings - Fork 0
/
syllablecount.py
39 lines (34 loc) · 1.07 KB
/
syllablecount.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import re
from nltk.corpus import cmudict
d = cmudict.dict()
VOWEL_RUNS = re.compile("[aeiouy]+", flags=re.I)
EXCEPTIONS = re.compile(
# fixes trailing e issues:
# smite, scared
"[^aeiou]e[sd]?$|"
# fixes adverbs:
# nicely
+ "[^e]ely$",
flags=re.I
)
ADDITIONAL = re.compile(
# fixes incorrect subtractions from exceptions:
# smile, scarred, raises, fated
"[^aeioulr][lr]e[sd]?$|[csgz]es$|[td]ed$|"
# fixes miscellaneous issues:
# flying, piano, video, prism, fire, evaluate
+ ".y[aeiou]|ia(?!n$)|eo|ism$|[^aeiou]ire$|[^gq]ua",
flags=re.I
)
def count_syllables(word):
try:
# assume first pronunciation
return [len(list(y for y in x if y[-1].isdigit())) for x in d[word.lower()]][0]
except KeyError:
# if word not found in cmudict
return weak_count_syllables(word)
def weak_count_syllables(word):
vowel_runs = len(VOWEL_RUNS.findall(word))
exceptions = len(EXCEPTIONS.findall(word))
additional = len(ADDITIONAL.findall(word))
return max(1, vowel_runs - exceptions + additional)