Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e2ef480
commit 77e26a5
Showing
3 changed files
with
244 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
90 changes: 90 additions & 0 deletions
90
src/test/java/org/elasticsearch/index/analysis/GreeklishGeneratorTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
package org.elasticsearch.index.analysis; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
import org.testng.annotations.BeforeMethod; | ||
import org.testng.annotations.BeforeClass; | ||
import org.testng.annotations.Test; | ||
import org.testng.Assert; | ||
|
||
public class GreeklishGeneratorTest { | ||
|
||
private static final int MAX_EXPANSIONS = 10; | ||
|
||
/** | ||
* a sample of greek words to generate their greeklish | ||
* counterparts. | ||
*/ | ||
private static final String[] greekWords = { "αυτοκινητο", "ομπρελα", | ||
"ξεσκεπαστοσ", }; | ||
|
||
/** | ||
* the greeklish counterparts that should be generated from the | ||
* greek words. | ||
*/ | ||
private static final String[] generatedGreeklishWords = { | ||
"autokinhto", "aftokinhto", "avtokinhto", "aytokinhto", | ||
"autokinito", "aftokinito", "avtokinito", "aytokinito", | ||
"omprela", "obrela", "kseskepastos", "xeskepastos" | ||
}; | ||
|
||
private GreeklishGenerator generator; | ||
|
||
private List<String> inputGreekList = new ArrayList<String>(); | ||
|
||
private List<StringBuilder> greeklishWords; | ||
|
||
private List<String> convertedGreeklishStrings = new ArrayList<String>(); | ||
|
||
@BeforeClass | ||
public void populateInputGreekList() { | ||
this.generator = new GreeklishGenerator(MAX_EXPANSIONS); | ||
|
||
for (String word : greekWords) { | ||
inputGreekList.add(word); | ||
} | ||
} | ||
|
||
@BeforeMethod | ||
public void setUp() { | ||
convertedGreeklishStrings.clear(); | ||
} | ||
|
||
@Test | ||
public void testGreekTokenConversionForValidWords() { | ||
for (int i = 0; i < greekWords.length; i++) { | ||
greeklishWords = generator.generateGreeklishWords(inputGreekList); | ||
|
||
populateConvertedStringsList(); | ||
|
||
Assert.assertFalse(greeklishWords.isEmpty(), | ||
"Greeklish words should be generated"); | ||
for (String greeklishWord : generatedGreeklishWords) { | ||
Assert.assertTrue( | ||
convertedGreeklishStrings.contains(greeklishWord), | ||
"It should contain the greeklish word: " | ||
+ greeklishWord); | ||
} | ||
} | ||
} | ||
|
||
@Test | ||
public void testMaxGreeklishWordGenerations() { | ||
int newMaxExpansions = 2; | ||
generator = new GreeklishGenerator(newMaxExpansions); | ||
|
||
greeklishWords = generator.generateGreeklishWords(inputGreekList); | ||
|
||
Assert.assertEquals(greeklishWords.size(), newMaxExpansions | ||
* inputGreekList.size(), | ||
"The generated words should be limited by the maxExpansions paramater"); | ||
|
||
} | ||
|
||
private final void populateConvertedStringsList() { | ||
for (StringBuilder word : greeklishWords) { | ||
convertedGreeklishStrings.add(word.toString()); | ||
} | ||
} | ||
} |
78 changes: 78 additions & 0 deletions
78
src/test/java/org/elasticsearch/index/analysis/GreeklishReverseStemmerTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
package org.elasticsearch.index.analysis; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
import org.testng.annotations.BeforeMethod; | ||
import org.testng.annotations.BeforeClass; | ||
import org.testng.annotations.Test; | ||
import org.testng.Assert; | ||
|
||
public class GreeklishReverseStemmerTest { | ||
|
||
/** | ||
* Some greek words whose variations we want to produce. | ||
*/ | ||
private static final String[] greekWords = { | ||
"κουρεματοσ", "ενδυματα", "γραφειου", "πεδιου", | ||
"γραναζι", "ποδηλατα", "καλωδιων" | ||
}; | ||
|
||
/** | ||
* Words that should not match to any rule. | ||
*/ | ||
private static final String[] nonMatchingWords = { | ||
"σουτιεν", "κολλαν", "αμπαλαζ", "μακιγιαζ" | ||
}; | ||
|
||
/** | ||
* The output we expect for each of the above words. | ||
*/ | ||
private static final String[][] greekVariants = { | ||
{"κουρεμα", "κουρεματων", "κουρεματα"}, | ||
{"ενδυμα", "ενδυματων", "ενδυματα", "ενδυματοσ"}, | ||
{"γραφειο", "γραφεια", "γραφειων"}, | ||
{"πεδια", "πεδιο", "πεδιων"}, | ||
{"γραναζια", "γραναζιου", "γραναζιων"}, | ||
{"ποδηλατο", "ποδηλατου", "ποδηλατα", "ποδηλατων"}, | ||
{"καλωδιου", "καλωδια", "καλωδιο"} | ||
}; | ||
|
||
private GreekReverseStemmer reverseStemmer; | ||
|
||
private List<String> generatedGreekVariants; | ||
|
||
@BeforeClass | ||
public void setUp() { | ||
this.reverseStemmer = new GreekReverseStemmer(); | ||
this.generatedGreekVariants = new ArrayList<String>(); | ||
} | ||
|
||
@BeforeMethod | ||
public void clearThePreviousResults() { | ||
generatedGreekVariants.clear(); | ||
} | ||
|
||
@Test | ||
public void testGenerationOfGreekVariants() { | ||
for (int i = 0; i < greekWords.length; i++) { | ||
generatedGreekVariants = reverseStemmer.generateGreekVariants(greekWords[i]); | ||
|
||
Assert.assertTrue(generatedGreekVariants.size() > 1, "The reverse stemmer should produce results"); | ||
|
||
for (String greekVariant : greekVariants[i]) { | ||
Assert.assertTrue(generatedGreekVariants.contains(greekVariant), | ||
"It should contain the greek variant: " + greekVariant); | ||
} | ||
} | ||
} | ||
|
||
@Test | ||
public void testNonMatchingWords() { | ||
for (String nonMatchingWord : nonMatchingWords) { | ||
generatedGreekVariants = reverseStemmer.generateGreekVariants(nonMatchingWord); | ||
|
||
Assert.assertTrue(generatedGreekVariants.size() == 1, "The reverse stemmer should not produce more results"); | ||
} | ||
} | ||
} |