diff --git a/src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java b/src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java index cb1d296a94..3c908ce831 100644 --- a/src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java +++ b/src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java @@ -103,11 +103,27 @@ public abstract class AbstractSequenceClassifier implements protected MaxSizeConcurrentHashSet knownLCWords; // = null; private DocumentReaderAndWriter defaultReaderAndWriter; + + /** This is the DocumentReaderAndWriter used for reading training and testing files. + * It is the DocumentReaderAndWriter specified by the readerAndWriter flag and + * defaults to {@code edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter} which + * is suitable for reading CoNLL-style TSV files. + * + * @return The default DocumentReaderAndWriter + */ public DocumentReaderAndWriter defaultReaderAndWriter() { return defaultReaderAndWriter; } private DocumentReaderAndWriter plainTextReaderAndWriter; + + /** This is the default DocumentReaderAndWriter used for reading text files for runtime + * classification. It is the DocumentReaderAndWriter specified by the plainTextDocumentReaderAndWriter + * flag and defaults to {@code edu.stanford.nlp.sequences.PlainTextDocumentReaderAndWriter} which + * is suitable for reading plain text files, in languages with a Tokenizer available. + * + * @return The default plain text DocumentReaderAndWriter + */ public DocumentReaderAndWriter plainTextReaderAndWriter() { return plainTextReaderAndWriter; } @@ -1011,15 +1027,11 @@ public static void outputCalibrationInfo(PrintWriter pw, pw.println("----------------------------------------"); } - public void classifyStdin() - throws IOException - { + public void classifyStdin() throws IOException { classifyStdin(plainTextReaderAndWriter); } - public void classifyStdin(DocumentReaderAndWriter readerWriter) - throws IOException - { + public void classifyStdin(DocumentReaderAndWriter readerWriter) throws IOException { BufferedReader is = IOUtils.readerFromStdin(flags.inputEncoding); for (String line; (line = is.readLine()) != null; ) { Collection> documents = makeObjectBankFromString(line, readerWriter); @@ -1039,8 +1051,9 @@ public void dumpFeatures(Collection> documents) {} /** * Load a test file, run the classifier on it, and then print the answers to - * stdout (with timing to stderr). This uses the value of flags.documentReader - * to determine testFile format. + * stdout (with timing to stderr). This uses the value of flags.readerAndWriter + * to determine testFile format. Note: This means that it works right for + * a testFile and not a plain textFile. * * @param testFile The file to test on. */ @@ -1065,8 +1078,7 @@ public Triple classifyAndWriteAnswers(String testFile, boo /** * Load a test file, run the classifier on it, and then print the answers to - * stdout (with timing to stderr). This uses the value of flags.documentReader - * to determine testFile format. + * stdout (with timing to stderr). * * @param testFile The file to test on. * @param readerWriter A reader and writer to use for the output @@ -1102,9 +1114,15 @@ public Triple classifyAndWriteAnswers(String baseDir, Stri return classifyAndWriteAnswers(documents, readerWriter, outputScores); } - public void classifyFilesAndWriteAnswers(Collection testFiles) + /** Run the classifier on a collection of text files. + * Uses the plainTextReaderAndWriter to process them. + * + * @param textFiles A File Collection to process. + * @throws IOException For any IO error + */ + public void classifyFilesAndWriteAnswers(Collection textFiles) throws IOException { - classifyFilesAndWriteAnswers(testFiles, plainTextReaderAndWriter, false); + classifyFilesAndWriteAnswers(textFiles, plainTextReaderAndWriter, false); } public void classifyFilesAndWriteAnswers(Collection testFiles, @@ -1275,8 +1293,7 @@ public void classifyAndWriteAnswersKBest(ObjectBank> documents, int k, */ public void classifyAndWriteViterbiSearchGraph(String testFile, String searchGraphPrefix, DocumentReaderAndWriter readerAndWriter) throws IOException { Timing timer = new Timing(); - ObjectBank> documents = - makeObjectBankFromFile(testFile, readerAndWriter); + ObjectBank> documents = makeObjectBankFromFile(testFile, readerAndWriter); int numWords = 0; int numSentences = 0; @@ -1286,8 +1303,9 @@ public void classifyAndWriteViterbiSearchGraph(String testFile, String searchGra PrintWriter latticeWriter = new PrintWriter(new FileOutputStream(searchGraphPrefix + '.' + numSentences + ".wlattice")); PrintWriter vsgWriter = new PrintWriter(new FileOutputStream(searchGraphPrefix + '.' + numSentences + ".lattice")); - if (readerAndWriter instanceof LatticeWriter) + if (readerAndWriter instanceof LatticeWriter) { ((LatticeWriter) readerAndWriter).printLattice(tagLattice, doc, latticeWriter); + } tagLattice.printAttFsmFormat(vsgWriter); latticeWriter.close(); vsgWriter.close(); diff --git a/src/edu/stanford/nlp/ie/crf/CRFClassifier.java b/src/edu/stanford/nlp/ie/crf/CRFClassifier.java index 3473e819c8..ae1b79b09e 100644 --- a/src/edu/stanford/nlp/ie/crf/CRFClassifier.java +++ b/src/edu/stanford/nlp/ie/crf/CRFClassifier.java @@ -3060,7 +3060,7 @@ public static void main(String[] args) throws Exception { // todo: Change testFile to call testFiles with a singleton list DocumentReaderAndWriter readerAndWriter = crf.defaultReaderAndWriter(); if (crf.flags.searchGraphPrefix != null) { - crf.classifyAndWriteViterbiSearchGraph(testFile, crf.flags.searchGraphPrefix, crf.makeReaderAndWriter()); + crf.classifyAndWriteViterbiSearchGraph(testFile, crf.flags.searchGraphPrefix, readerAndWriter); } else if (crf.flags.printFirstOrderProbs) { crf.printFirstOrderProbs(testFile, readerAndWriter); } else if (crf.flags.printFactorTable) { @@ -3087,14 +3087,11 @@ public static void main(String[] args) throws Exception { } if (textFile != null) { - crf.classifyAndWriteAnswers(textFile); + crf.classifyAndWriteAnswers(textFile, crf.plainTextReaderAndWriter(), false); } if (textFiles != null) { - List files = new ArrayList<>(); - for (String filename : textFiles.split(",")) { - files.add(new File(filename)); - } + List files = Arrays.stream(textFiles.split(",")).map(File::new).collect(Collectors.toList()); crf.classifyFilesAndWriteAnswers(files); } diff --git a/src/edu/stanford/nlp/process/AbstractTokenizer.java b/src/edu/stanford/nlp/process/AbstractTokenizer.java index 0cb808dede..15245f68d2 100644 --- a/src/edu/stanford/nlp/process/AbstractTokenizer.java +++ b/src/edu/stanford/nlp/process/AbstractTokenizer.java @@ -1,11 +1,12 @@ -package edu.stanford.nlp.process; -import edu.stanford.nlp.util.logging.Redwood; - +package edu.stanford.nlp.process; import java.util.ArrayList; import java.util.List; import java.util.NoSuchElementException; +// import edu.stanford.nlp.util.logging.Redwood; + + /** * An abstract tokenizer. Tokenizers extending AbstractTokenizer need only * implement the {@code getNext()} method. This implementation does not @@ -18,8 +19,8 @@ public abstract class AbstractTokenizer implements Tokenizer { - /** A logger for this class */ - private static Redwood.RedwoodChannels log = Redwood.channels(AbstractTokenizer.class); + // /** A logger for this class */ + // private static final Redwood.RedwoodChannels log = Redwood.channels(AbstractTokenizer.class); protected T nextToken; // = null;