Improved tests

Sandhisplitter, Joiner, Splitter tests has been added. Newly added lines to PostProcessor, util, trie, model is now being covered. Modified samples.txt to avoid conflicts. Added join_cases for joiner specific tests.
libindic · Jul 19, 2016 · 027beae · 027beae
1 parent 194e15a
commit 027beae
Show file tree

Hide file tree

Showing 6 changed files with 108 additions and 1 deletion.
diff --git a/sandhisplitter/tests/resources/samples.txt b/sandhisplitter/tests/resources/samples.txt
@@ -16,4 +16,5 @@
 അകലെയാണ്=അകലെ+ആണ്|4
 അകലെയുള്ള=അകലെ+ഉള്ള|4
 അകാരണമായി=അകാരണം+ആയി|5
-അക്കമിട്ടുപറയുന്നുണ്ട്=അക്കമിട്ടു+പറയുന്നുണ്ട്|9
+എത്തുമിപ്പോള്‍=എത്തും+ഇപ്പോള്‍|5
+എത്തുമക്കരെ=എത്തും+അക്കരെ|5
diff --git a/sandhisplitter/tests/test_init.py b/sandhisplitter/tests/test_init.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from io import open
+from sandhisplitter import Sandhisplitter
+from sandhisplitter import getInstance
+from sandhisplitter.model import Model
+from testtools import TestCase
+from sandhisplitter.util import extract
+from pkg_resources import resource_filename
+
+
+class TestSandhisplitter(TestCase):
+    def setUp(self):
+        super(TestSandhisplitter, self).setUp()
+        self.model = Model(depth=3, skip=1)
+        self.SS = Sandhisplitter()
+        testcases = resource_filename("sandhisplitter.tests",
+                                      "resources/samples.txt")
+        self.entries = open(testcases, "r", encoding='utf-8')
+
+    def test_splits(self):
+        count = 0
+        entries = map(lambda x: x.strip(), self.entries.readlines())
+        for line in entries:
+            count += 1
+            (word, splits, locs) = extract(line)
+            self.model.add_entry(word, splits, locs)
+        m = self.model.serialize()
+        self.SS.set_model(m)
+        for line in entries:
+            (word, splits, locs) = extract(line)
+            obtained, pos = self.SS.split(word)
+            self.assertEqual(locs, pos)
+            self.assertEqual(splits, obtained)
+
+    def test_details(self):
+        self.assertEqual(self.SS.get_module_name(), "Sandhi-Splitter")
+        self.assertEqual(self.SS.get_info(),
+                         "Sandhi-splitter for malayalam")
+
+    def test_instance(self):
+        self.assertEqual(isinstance(getInstance(), Sandhisplitter), True)
diff --git a/sandhisplitter/tests/test_model.py b/sandhisplitter/tests/test_model.py
@@ -34,3 +34,6 @@ def test_load(self):
         locs = list(locs)
         sps = self.testModel.probable_splits(word)
         self.assertEqual(sps, locs)
+
+    def test_error(self):
+        self.assertRaises(ValueError, Model, "what")
diff --git a/sandhisplitter/tests/test_splitter.py b/sandhisplitter/tests/test_splitter.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+from io import open
+from sandhisplitter.splitter import Splitter
+from sandhisplitter.model import Model
+from testtools import TestCase
+from sandhisplitter.util import extract
+from pkg_resources import resource_filename
+
+
+class TestSplitter(TestCase):
+    def setUp(self):
+        super(TestSplitter, self).setUp()
+        self.testModel = Model(depth=3, skip=1)
+        testcases = resource_filename("sandhisplitter.tests",
+                                      "resources/samples.txt")
+        self.entries = open(testcases, "r", encoding='utf-8')
+
+    def test_load(self):
+        count = 0
+        firstline = None
+        for line in self.entries:
+            count += 1
+            if count == 1:
+                firstline = line
+            (word, splits, locs) = extract(line)
+            self.testModel.add_entry(word, splits, locs)
+        m = self.testModel.serialize()
+        self.testModel.load(m)
+        self.splitter = Splitter(m)
+        # Test probale splits
+        (word, splits, locs) = extract(firstline)
+        locs = list(locs)
+        sps = self.splitter.splits(word)
+        self.assertEqual(sps, locs)
diff --git a/sandhisplitter/tests/test_trie.py b/sandhisplitter/tests/test_trie.py
@@ -51,3 +51,5 @@ def test_smoothed_psp(self):
                 else:
                     self.assertEqual(self.testTrie.smoothed_P_sp(word, i), 0.5)
         self.assertEqual(self.testTrie.smoothed_P_sp("hi", 0), 0.5)
+
+        self.assertEqual(self.testTrie.smoothed_P_sp('', 0), 0.0)
diff --git a/sandhisplitter/tests/test_util.py b/sandhisplitter/tests/test_util.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+from io import open
+from testtools import TestCase
+from pkg_resources import resource_filename
+from sandhisplitter.util import extract, compress, head_tail
+
+
+class TestUtils(TestCase):
+    def setUp(self):
+        super(TestUtils, self).setUp()
+        testcases = resource_filename("sandhisplitter.tests",
+                                      "resources/samples.txt")
+        self.entries = open(testcases, "r", encoding='utf-8')
+
+    def test_extract_compress(self):
+        entries = map(lambda x: x.strip(), self.entries.readlines())
+        for inline in entries:
+            s, sps, l = extract(inline)
+            outline = compress(s, sps, l)
+            self.assertEqual(inline, outline)
+
+    def test_head_tail(self):
+        self.assertRaises(IndexError, head_tail, [])
+        self.assertEqual(head_tail([1]), (1, []))
+        self.assertEqual(head_tail([1, 2]), (1, [2]))