From 16e33fd0413cbda18ef1892faa083c8d37547bef Mon Sep 17 00:00:00 2001 From: Sasa Hasan Date: Fri, 3 Feb 2017 12:25:02 +0100 Subject: [PATCH] added filler rules for MosesCompoundSplitter test on De-En w/ a 12k test set shows improvement of 0.5% BLEU this commit also fixes a unit test to make it pass on Windows --- src/edu/stanford/nlp/mt/process/MosesCompoundSplitter.java | 4 ++-- test/edu/stanford/nlp/mt/util/FlatNBestListTest.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/edu/stanford/nlp/mt/process/MosesCompoundSplitter.java b/src/edu/stanford/nlp/mt/process/MosesCompoundSplitter.java index c5f78d900..45c5aa1a9 100644 --- a/src/edu/stanford/nlp/mt/process/MosesCompoundSplitter.java +++ b/src/edu/stanford/nlp/mt/process/MosesCompoundSplitter.java @@ -30,7 +30,7 @@ */ public class MosesCompoundSplitter { - private static String[] FILLERS = {"", "s", "es"}; + private static String[] FILLERS = {"", "s", "es", "-", "en"}; private static final int MIN_SIZE = 3; // the minimum number of characters is actually MIN_SIZE + 1 private static final int MIN_COUNT = 5; private static final int MAX_COUNT = 5; @@ -71,7 +71,7 @@ private void loadModel(String modelFileName) throws IOException { reader.close(); throw new IOException("Illegal input in model file, line " + reader.getLineNumber() + ": " + line); } - int cnt = Integer.parseInt(input[2]); + long cnt = Long.parseLong(input[2]); totalCount += cnt; String tc = input[1]; if(cnt < minCnt || tc.length() < MIN_SIZE + 1) continue; // these will never be used for splitting anyway diff --git a/test/edu/stanford/nlp/mt/util/FlatNBestListTest.java b/test/edu/stanford/nlp/mt/util/FlatNBestListTest.java index d06f9b54f..502a23e54 100644 --- a/test/edu/stanford/nlp/mt/util/FlatNBestListTest.java +++ b/test/edu/stanford/nlp/mt/util/FlatNBestListTest.java @@ -50,7 +50,7 @@ private double getValue(Collection> fvs, String name) { } public void testToString() throws IOException { - String strRep = nbestList.toString(); + String strRep = nbestList.toString().replaceAll("\r\n", "\n"); // replaceAll: fixes test on Windows platforms assertEquals(156305, strRep.length()); }