Permalink
Browse files

don't skip the reader of the huge linedocs file, skip the inputstream…

… instead

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1336904 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
1 parent 421ee98 commit c97e49bde8f8a9e829f93568ab84af2ae6f9cbb2 @rmuir rmuir committed May 10, 2012
Showing with 8 additions and 3 deletions.
  1. +8 −3 lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java
@@ -24,6 +24,9 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.zip.GZIPInputStream;
@@ -88,8 +91,6 @@ private synchronized void open(Random random) throws IOException {
size *= 2.8;
}
- reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), BUFFER_SIZE);
-
// Override sizes for currently "known" line files:
if (path.equals("europarl.lines.txt.gz")) {
size = 15129506L;
@@ -103,7 +104,11 @@ private synchronized void open(Random random) throws IOException {
if (LuceneTestCase.VERBOSE) {
System.out.println("TEST: LineFileDocs: seek to fp=" + seekTo + " on open");
}
- reader.skip(seekTo);
+ is.skip(seekTo);
+ CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
+ .onMalformedInput(CodingErrorAction.IGNORE)
+ .onUnmappableCharacter(CodingErrorAction.IGNORE);
+ reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE);
reader.readLine();
}
}

0 comments on commit c97e49b

Please sign in to comment.