Skip to content

Commit

Permalink
Improved tests
Browse files Browse the repository at this point in the history
Sandhisplitter, Joiner, Splitter tests has been added.
Newly added lines to PostProcessor, util, trie, model is now being
covered.
Modified samples.txt to avoid conflicts.
Added join_cases for joiner specific tests.
  • Loading branch information
jerinphilip committed Jul 19, 2016
1 parent 194e15a commit 027beae
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 1 deletion.
3 changes: 2 additions & 1 deletion sandhisplitter/tests/resources/samples.txt
Expand Up @@ -16,4 +16,5 @@
അകലെയാണ്=അകലെ+ആണ്|4
അകലെയുള്ള=അകലെ+ഉള്ള|4
അകാരണമായി=അകാരണം+ആയി|5
അക്കമിട്ടുപറയുന്നുണ്ട്=അക്കമിട്ടു+പറയുന്നുണ്ട്|9
എത്തുമിപ്പോള്‍=എത്തും+ഇപ്പോള്‍|5
എത്തുമക്കരെ=എത്തും+അക്കരെ|5
42 changes: 42 additions & 0 deletions sandhisplitter/tests/test_init.py
@@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from io import open
from sandhisplitter import Sandhisplitter
from sandhisplitter import getInstance
from sandhisplitter.model import Model
from testtools import TestCase
from sandhisplitter.util import extract
from pkg_resources import resource_filename


class TestSandhisplitter(TestCase):
def setUp(self):
super(TestSandhisplitter, self).setUp()
self.model = Model(depth=3, skip=1)
self.SS = Sandhisplitter()
testcases = resource_filename("sandhisplitter.tests",
"resources/samples.txt")
self.entries = open(testcases, "r", encoding='utf-8')

def test_splits(self):
count = 0
entries = map(lambda x: x.strip(), self.entries.readlines())
for line in entries:
count += 1
(word, splits, locs) = extract(line)
self.model.add_entry(word, splits, locs)
m = self.model.serialize()
self.SS.set_model(m)
for line in entries:
(word, splits, locs) = extract(line)
obtained, pos = self.SS.split(word)
self.assertEqual(locs, pos)
self.assertEqual(splits, obtained)

def test_details(self):
self.assertEqual(self.SS.get_module_name(), "Sandhi-Splitter")
self.assertEqual(self.SS.get_info(),
"Sandhi-splitter for malayalam")

def test_instance(self):
self.assertEqual(isinstance(getInstance(), Sandhisplitter), True)
3 changes: 3 additions & 0 deletions sandhisplitter/tests/test_model.py
Expand Up @@ -34,3 +34,6 @@ def test_load(self):
locs = list(locs)
sps = self.testModel.probable_splits(word)
self.assertEqual(sps, locs)

def test_error(self):
self.assertRaises(ValueError, Model, "what")
34 changes: 34 additions & 0 deletions sandhisplitter/tests/test_splitter.py
@@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
from io import open
from sandhisplitter.splitter import Splitter
from sandhisplitter.model import Model
from testtools import TestCase
from sandhisplitter.util import extract
from pkg_resources import resource_filename


class TestSplitter(TestCase):
def setUp(self):
super(TestSplitter, self).setUp()
self.testModel = Model(depth=3, skip=1)
testcases = resource_filename("sandhisplitter.tests",
"resources/samples.txt")
self.entries = open(testcases, "r", encoding='utf-8')

def test_load(self):
count = 0
firstline = None
for line in self.entries:
count += 1
if count == 1:
firstline = line
(word, splits, locs) = extract(line)
self.testModel.add_entry(word, splits, locs)
m = self.testModel.serialize()
self.testModel.load(m)
self.splitter = Splitter(m)
# Test probale splits
(word, splits, locs) = extract(firstline)
locs = list(locs)
sps = self.splitter.splits(word)
self.assertEqual(sps, locs)
2 changes: 2 additions & 0 deletions sandhisplitter/tests/test_trie.py
Expand Up @@ -51,3 +51,5 @@ def test_smoothed_psp(self):
else:
self.assertEqual(self.testTrie.smoothed_P_sp(word, i), 0.5)
self.assertEqual(self.testTrie.smoothed_P_sp("hi", 0), 0.5)

self.assertEqual(self.testTrie.smoothed_P_sp('', 0), 0.0)
25 changes: 25 additions & 0 deletions sandhisplitter/tests/test_util.py
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
from io import open
from testtools import TestCase
from pkg_resources import resource_filename
from sandhisplitter.util import extract, compress, head_tail


class TestUtils(TestCase):
def setUp(self):
super(TestUtils, self).setUp()
testcases = resource_filename("sandhisplitter.tests",
"resources/samples.txt")
self.entries = open(testcases, "r", encoding='utf-8')

def test_extract_compress(self):
entries = map(lambda x: x.strip(), self.entries.readlines())
for inline in entries:
s, sps, l = extract(inline)
outline = compress(s, sps, l)
self.assertEqual(inline, outline)

def test_head_tail(self):
self.assertRaises(IndexError, head_tail, [])
self.assertEqual(head_tail([1]), (1, []))
self.assertEqual(head_tail([1, 2]), (1, [2]))

0 comments on commit 027beae

Please sign in to comment.