Skip to content

Commit

Permalink
Add some unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
petercollingridge committed Mar 21, 2012
1 parent a96dc5d commit b7e69a7
Show file tree
Hide file tree
Showing 3 changed files with 261 additions and 0 deletions.
41 changes: 41 additions & 0 deletions tone_marks.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
Latin-1 Supplement - Unicode U+0080 - U+00FF - (128-255)
á = á = á = á
à = à = à = à
é = é = é = é
è = è = è = è
í = í = í = í
ì = ì = ì = ì
ó = ó = ó = ó
ò = ò = ò = ò
ú = ú = ú = ó
ù = ù = ù = ù
ü = ü = ü = ü
subtract 32 for upper case

Latin Extended-A - Unicode U+0100 - U+017F - (256-383)
ā = ā = ā
ē = ē = ē
ě = ě = ě
ī = ī = ī
ō = ō = ō
ū = ū = ū
subtract 1 for upper case

Latin Extended-B U+0180 - U+024F (384-591)
ǎ = ǎ = ǎ
ǐ = ǐ = ǐ
ǒ = ǒ = ǒ
ǔ = ǔ = ǔ

ǖ = ǖ = ǖ
ǘ = ǘ = ǘ
ǚ = ǚ = ǚ
ǜ = ǜ = ǜ
subtract 1 for upper case

ā á ǎ à a
ē é ě è e
ī í ǐ ì i
ō ó ǒ ò o
ū ú ǔ ù u
ǖ ǘ ǚ ǜ ü
50 changes: 50 additions & 0 deletions unit_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-

""" Unit tests for pinyinizer.py """
import sys
sys.path.append("/..")

import pinyinizer

def getKnownValues(filename):
""" Load file of known values for testing. """

known_values = {}
current_type = None

with open(filename) as f:
for line in f:
if line.startswith('#'):
current_type = line[2:].strip()
known_values[current_type] = []
else:
if len(line)>2 and current_type:
known_values[current_type].append(tuple(line.strip().split('\t')))

return known_values

def runTests():
known_values = getKnownValues("unit_tests_known_values.txt")

passed = 0
for test_type, test_set in known_values.iteritems():
failures = []

for (test_in, test_out) in test_set:
result = pinyinizer.addToneMarks(test_in)
if result != test_out:
failures.append(" %s -> %s (expecting: %s)" % (test_in, result, test_out))
else:
passed += 1

if failures:
print ' In "%s", %d of %d failed:' % (test_type, len(failures), len(test_set))
for failure in failures:
print failure
print

print "Passed %d tests" % passed

if __name__ == "__main__":
runTests()
170 changes: 170 additions & 0 deletions unit_tests_known_values.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
# Words that should not change
ba ba
pa6 pa6
zi zi
zha zha
web2 web2
negation1 negation1

# One initial + a
ba0 ba
pa1 pā
ma2 má
fa3 fǎ
da4 dà
ta5 ta
na0 na
la1 lā
za2 zá
ca3 cǎ
sa4 sà
ga5 ga
ka0 ka
ha1 hā
ba2 bá
pa3 pǎ
ma4 mà
fa5 fa

# One initial + o
bo0 bo
po1 pō
mo2 mó
fo3 fǒ
bo4 bò

# One initial + e
me0 me
de5 de
te1 tē
ne2 né
le3 lě
ze4 zè
ce1 cē
se2 sé
re3 rě

# One initial + i
zi0 zi
ci1 cī
si2 sí
ji3 jǐ
qi4 qì
xi5 xi
ri1 rī
bi2 bí
pi3 pǐ
mi4 mì
di5 di
ti0 ti
ni1 nī
li2 lí

# One initial + v
lv lü
nv0 nü
lv1 lǖ
nv2 nǘ
lv3 lǚ
nv4 nǜ
lv5 lü

# Two initials + vowel
zha0 zha
che1 chē
shi2 shí
zhu3 zhǔ

# Initial + an
an0 an
an1 ān
ban2 bán
dan3 dǎn
ran4 ràn
gan5 gan
zhan0 zhan
chan1 chān
shan2 shán

# Initial + en
en0 en
en2 én
pen1 pēn
den3 děn
nen4 nèn
cen5 cen
shen1 shēn
hen3 hěn

# Initial + ang
ang0 ang
ang2 áng
pang1 pāng
tang3 tǎng
cang4 càng
kang5 kang
zhang1 zhāng
chang0 chang
shang2 sháng

# Initial + eng
meng0 meng
leng1 lēng
zeng2 zéng
sheng3 shěng
reng4 rèng
geng5 geng

# Initial + ong
dong0 dong
cong1 cōng
zong2 zóng
zhong3 zhǒng
rong4 ròng
kong5 kong

# Initial + in
yin0 yin
bin1 bīn
pin2 pín
min3 mǐn
nin4 nìn
lin5 lin
jin1 jīn
qin2 qín
xin3 xǐn

# Initial + ian
bian0 bian
pian1 piān
mian2 mián
dian3 diǎn
tian4 tiàn
nian5 nian
lian1 liān
jian2 jián
qian3 qiǎn
xian4 xiàn

# Initial + uan
zhuan0 zhuan
chuan1 chuān
shuan2 shuán
juan3 juǎn
quan4 quàn
xuan5 xuan

# With r
er0 er
er2 ér
dianr3 diǎnr
shir4 shìr
huar1 huār
nar2 nár

# Compound words
ni3hao3 nǐhaǒ
pin1yin1 pīnyīn

# Sentences
wo3 ai4 ni3 wǒ aì nǐ

0 comments on commit b7e69a7

Please sign in to comment.