Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Sandhisplitter, Joiner, Splitter tests has been added. Newly added lines to PostProcessor, util, trie, model is now being covered. Modified samples.txt to avoid conflicts. Added join_cases for joiner specific tests.
- Loading branch information
1 parent
194e15a
commit 027beae
Showing
6 changed files
with
108 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# -*- coding: utf-8 -*- | ||
from __future__ import unicode_literals | ||
from io import open | ||
from sandhisplitter import Sandhisplitter | ||
from sandhisplitter import getInstance | ||
from sandhisplitter.model import Model | ||
from testtools import TestCase | ||
from sandhisplitter.util import extract | ||
from pkg_resources import resource_filename | ||
|
||
|
||
class TestSandhisplitter(TestCase): | ||
def setUp(self): | ||
super(TestSandhisplitter, self).setUp() | ||
self.model = Model(depth=3, skip=1) | ||
self.SS = Sandhisplitter() | ||
testcases = resource_filename("sandhisplitter.tests", | ||
"resources/samples.txt") | ||
self.entries = open(testcases, "r", encoding='utf-8') | ||
|
||
def test_splits(self): | ||
count = 0 | ||
entries = map(lambda x: x.strip(), self.entries.readlines()) | ||
for line in entries: | ||
count += 1 | ||
(word, splits, locs) = extract(line) | ||
self.model.add_entry(word, splits, locs) | ||
m = self.model.serialize() | ||
self.SS.set_model(m) | ||
for line in entries: | ||
(word, splits, locs) = extract(line) | ||
obtained, pos = self.SS.split(word) | ||
self.assertEqual(locs, pos) | ||
self.assertEqual(splits, obtained) | ||
|
||
def test_details(self): | ||
self.assertEqual(self.SS.get_module_name(), "Sandhi-Splitter") | ||
self.assertEqual(self.SS.get_info(), | ||
"Sandhi-splitter for malayalam") | ||
|
||
def test_instance(self): | ||
self.assertEqual(isinstance(getInstance(), Sandhisplitter), True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# -*- coding: utf-8 -*- | ||
from io import open | ||
from sandhisplitter.splitter import Splitter | ||
from sandhisplitter.model import Model | ||
from testtools import TestCase | ||
from sandhisplitter.util import extract | ||
from pkg_resources import resource_filename | ||
|
||
|
||
class TestSplitter(TestCase): | ||
def setUp(self): | ||
super(TestSplitter, self).setUp() | ||
self.testModel = Model(depth=3, skip=1) | ||
testcases = resource_filename("sandhisplitter.tests", | ||
"resources/samples.txt") | ||
self.entries = open(testcases, "r", encoding='utf-8') | ||
|
||
def test_load(self): | ||
count = 0 | ||
firstline = None | ||
for line in self.entries: | ||
count += 1 | ||
if count == 1: | ||
firstline = line | ||
(word, splits, locs) = extract(line) | ||
self.testModel.add_entry(word, splits, locs) | ||
m = self.testModel.serialize() | ||
self.testModel.load(m) | ||
self.splitter = Splitter(m) | ||
# Test probale splits | ||
(word, splits, locs) = extract(firstline) | ||
locs = list(locs) | ||
sps = self.splitter.splits(word) | ||
self.assertEqual(sps, locs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# -*- coding: utf-8 -*- | ||
from io import open | ||
from testtools import TestCase | ||
from pkg_resources import resource_filename | ||
from sandhisplitter.util import extract, compress, head_tail | ||
|
||
|
||
class TestUtils(TestCase): | ||
def setUp(self): | ||
super(TestUtils, self).setUp() | ||
testcases = resource_filename("sandhisplitter.tests", | ||
"resources/samples.txt") | ||
self.entries = open(testcases, "r", encoding='utf-8') | ||
|
||
def test_extract_compress(self): | ||
entries = map(lambda x: x.strip(), self.entries.readlines()) | ||
for inline in entries: | ||
s, sps, l = extract(inline) | ||
outline = compress(s, sps, l) | ||
self.assertEqual(inline, outline) | ||
|
||
def test_head_tail(self): | ||
self.assertRaises(IndexError, head_tail, []) | ||
self.assertEqual(head_tail([1]), (1, [])) | ||
self.assertEqual(head_tail([1, 2]), (1, [2])) |