Skip to content

Commit

Permalink
added filler rules for MosesCompoundSplitter
Browse files Browse the repository at this point in the history
test on De-En w/ a 12k test set shows improvement of 0.5% BLEU

this commit also fixes a unit test to make it pass on Windows
  • Loading branch information
Sasa Hasan committed Feb 3, 2017
1 parent cbb34e9 commit 16e33fd
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
4 changes: 2 additions & 2 deletions src/edu/stanford/nlp/mt/process/MosesCompoundSplitter.java
Expand Up @@ -30,7 +30,7 @@
*/
public class MosesCompoundSplitter {

private static String[] FILLERS = {"", "s", "es"};
private static String[] FILLERS = {"", "s", "es", "-", "en"};
private static final int MIN_SIZE = 3; // the minimum number of characters is actually MIN_SIZE + 1
private static final int MIN_COUNT = 5;
private static final int MAX_COUNT = 5;
Expand Down Expand Up @@ -71,7 +71,7 @@ private void loadModel(String modelFileName) throws IOException {
reader.close();
throw new IOException("Illegal input in model file, line " + reader.getLineNumber() + ": " + line);
}
int cnt = Integer.parseInt(input[2]);
long cnt = Long.parseLong(input[2]);
totalCount += cnt;
String tc = input[1];
if(cnt < minCnt || tc.length() < MIN_SIZE + 1) continue; // these will never be used for splitting anyway
Expand Down
2 changes: 1 addition & 1 deletion test/edu/stanford/nlp/mt/util/FlatNBestListTest.java
Expand Up @@ -50,7 +50,7 @@ private double getValue(Collection<FeatureValue<String>> fvs, String name) {
}

public void testToString() throws IOException {
String strRep = nbestList.toString();
String strRep = nbestList.toString().replaceAll("\r\n", "\n"); // replaceAll: fixes test on Windows platforms
assertEquals(156305, strRep.length());
}

Expand Down

0 comments on commit 16e33fd

Please sign in to comment.