diff --git a/src/java/opennlp/maxent/BasicEventStream.java b/src/java/opennlp/maxent/BasicEventStream.java
index 4c75a6d..1bea488 100644
--- a/src/java/opennlp/maxent/BasicEventStream.java
+++ b/src/java/opennlp/maxent/BasicEventStream.java
@@ -17,8 +17,8 @@
package opennlp.maxent;
+import opennlp.model.AbstractEventStream;
import opennlp.model.Event;
-import opennlp.model.EventStream;
/**
* A object which can deliver a stream of training events assuming
@@ -30,9 +30,9 @@
*
cp_1 cp_2 ... cp_n outcome
*
* @author Jason Baldridge
- * @version $Revision: 1.4 $, $Date: 2008/09/28 18:03:47 $
+ * @version $Revision: 1.5 $, $Date: 2008/11/06 19:59:44 $
*/
-public class BasicEventStream implements EventStream {
+public class BasicEventStream extends AbstractEventStream {
ContextGenerator cg = new BasicContextGenerator();
DataStream ds;
Event next;
@@ -48,7 +48,7 @@ public BasicEventStream (DataStream ds) {
*
* @return the Event object which is next in this EventStream
*/
- public Event nextEvent () {
+ public Event next () {
while (next == null && this.ds.hasNext())
next = createEvent((String)this.ds.nextToken());
diff --git a/src/java/opennlp/maxent/GISModel.java b/src/java/opennlp/maxent/GISModel.java
index fb3dd15..f4ab161 100644
--- a/src/java/opennlp/maxent/GISModel.java
+++ b/src/java/opennlp/maxent/GISModel.java
@@ -33,7 +33,7 @@
* Iterative Scaling procedure (implemented in GIS.java).
*
* @author Tom Morton and Jason Baldridge
- * @version $Revision: 1.23 $, $Date: 2008/09/28 18:03:50 $
+ * @version $Revision: 1.24 $, $Date: 2008/11/06 19:59:44 $
*/
public final class GISModel extends AbstractModel {
/**
@@ -77,11 +77,11 @@ public GISModel (Context[] params, String[] predLabels, String[] outcomeNames, i
* getOutcome(int i).
*/
public final double[] eval(String[] context) {
- return(eval(context,new double[evalParams.numOutcomes]));
+ return(eval(context,new double[evalParams.getNumOutcomes()]));
}
public final double[] eval(String[] context, float[] values) {
- return(eval(context,values,new double[evalParams.numOutcomes]));
+ return(eval(context,values,new double[evalParams.getNumOutcomes()]));
}
public final double[] eval(String[] context, double[] outsums) {
@@ -144,8 +144,8 @@ public static double[] eval(int[] context, double[] prior, EvalParameters model)
* getOutcome(int i).
*/
public static double[] eval(int[] context, float[] values, double[] prior, EvalParameters model) {
- Context[] params = model.params;
- int numfeats[] = new int[model.numOutcomes];
+ Context[] params = model.getParams();
+ int numfeats[] = new int[model.getNumOutcomes()];
int[] activeOutcomes;
double[] activeParameters;
double value = 1;
@@ -166,17 +166,17 @@ public static double[] eval(int[] context, float[] values, double[] prior, EvalP
}
double normal = 0.0;
- for (int oid = 0; oid < model.numOutcomes; oid++) {
- if (model.correctionParam != 0) {
- prior[oid] = Math.exp(prior[oid]*model.constantInverse+((1.0 - ((double) numfeats[oid] / model.correctionConstant)) * model.correctionParam));
+ for (int oid = 0; oid < model.getNumOutcomes(); oid++) {
+ if (model.getCorrectionParam() != 0) {
+ prior[oid] = Math.exp(prior[oid]*model.getConstantInverse()+((1.0 - ((double) numfeats[oid] / model.getCorrectionConstant())) * model.getCorrectionParam()));
}
else {
- prior[oid] = Math.exp(prior[oid]*model.constantInverse);
+ prior[oid] = Math.exp(prior[oid]*model.getConstantInverse());
}
normal += prior[oid];
}
- for (int oid = 0; oid < model.numOutcomes; oid++) {
+ for (int oid = 0; oid < model.getNumOutcomes(); oid++) {
prior[oid] /= normal;
}
return prior;
diff --git a/src/java/opennlp/maxent/GISTrainer.java b/src/java/opennlp/maxent/GISTrainer.java
index 5aff377..f573f4b 100644
--- a/src/java/opennlp/maxent/GISTrainer.java
+++ b/src/java/opennlp/maxent/GISTrainer.java
@@ -45,7 +45,7 @@
*
* @author Tom Morton
* @author Jason Baldridge
- * @version $Revision: 1.30 $, $Date: 2008/09/28 18:03:38 $
+ * @version $Revision: 1.31 $, $Date: 2008/11/06 19:59:44 $
*/
class GISTrainer {
@@ -364,7 +364,7 @@ else if (useSimpleSmoothing) {
findParameters(iterations);
/*************** Create and return the model ******************/
- return new GISModel(params, predLabels, outcomeLabels, correctionConstant, evalParams.correctionParam);
+ return new GISModel(params, predLabels, outcomeLabels, correctionConstant, evalParams.getCorrectionParam());
}
@@ -467,7 +467,7 @@ private double nextIteration() {
}
}
if (useSlackParameter)
- CFMOD += (evalParams.correctionConstant - contexts[ei].length) * numTimesEventsSeen[ei];
+ CFMOD += (evalParams.getCorrectionConstant() - contexts[ei].length) * numTimesEventsSeen[ei];
loglikelihood += Math.log(modelDistribution[outcomeList[ei]]) * numTimesEventsSeen[ei];
numEvents += numTimesEventsSeen[ei];
@@ -493,7 +493,7 @@ private double nextIteration() {
int[] activeOutcomes = params[pi].getOutcomes();
for (int aoi=0;aoi 0.0 && useSlackParameter)
- evalParams.correctionParam += (cfObservedExpect - Math.log(CFMOD));
+ evalParams.setCorrectionParam(evalParams.getCorrectionParam() + (cfObservedExpect - Math.log(CFMOD)));
display(". loglikelihood=" + loglikelihood + "\t" + ((double) numCorrect / numEvents) + "\n");
return (loglikelihood);
diff --git a/src/java/opennlp/maxent/RealBasicEventStream.java b/src/java/opennlp/maxent/RealBasicEventStream.java
index b95adc9..1b23a66 100644
--- a/src/java/opennlp/maxent/RealBasicEventStream.java
+++ b/src/java/opennlp/maxent/RealBasicEventStream.java
@@ -17,11 +17,12 @@
package opennlp.maxent;
+import opennlp.model.AbstractEventStream;
import opennlp.model.Event;
import opennlp.model.EventStream;
import opennlp.model.RealValueFileEventStream;
-public class RealBasicEventStream implements EventStream {
+public class RealBasicEventStream extends AbstractEventStream {
ContextGenerator cg = new BasicContextGenerator();
DataStream ds;
Event next;
@@ -33,7 +34,7 @@ public RealBasicEventStream(DataStream ds) {
}
- public Event nextEvent() {
+ public Event next() {
while (next == null && this.ds.hasNext())
next = createEvent((String)this.ds.nextToken());
@@ -67,7 +68,7 @@ private Event createEvent(String obs) {
public static void main(String[] args) throws java.io.IOException {
EventStream es = new RealBasicEventStream(new PlainTextByLineDataStream(new java.io.FileReader(args[0])));
while (es.hasNext()) {
- System.out.println(es.nextEvent());
+ System.out.println(es.next());
}
}
}
diff --git a/src/java/opennlp/maxent/io/BinaryGISModelReader.java b/src/java/opennlp/maxent/io/BinaryGISModelReader.java
index 5151a7b..e832063 100644
--- a/src/java/opennlp/maxent/io/BinaryGISModelReader.java
+++ b/src/java/opennlp/maxent/io/BinaryGISModelReader.java
@@ -17,17 +17,17 @@
package opennlp.maxent.io;
-import java.io.*;
-import java.util.zip.*;
+import java.io.DataInputStream;
+
+import opennlp.model.BinaryFileDataReader;
/**
* A reader for GIS models stored in binary format.
*
* @author Jason Baldridge
- * @version $Revision: 1.2 $, $Date: 2008/09/28 18:04:24 $
+ * @version $Revision: 1.3 $, $Date: 2008/11/06 19:59:44 $
*/
public class BinaryGISModelReader extends GISModelReader {
- protected DataInputStream input;
/**
* Constructor which directly instantiates the DataInputStream containing
@@ -36,38 +36,6 @@ public class BinaryGISModelReader extends GISModelReader {
* @param dis The DataInputStream containing the model information.
*/
public BinaryGISModelReader (DataInputStream dis) {
- input = dis;
- }
-
- /**
- * Constructor which takes a File and creates a reader for it. Detects
- * whether the file is gzipped or not based on whether the suffix contains
- * ".gz"
- *
- * @param f The File in which the model is stored.
- */
- public BinaryGISModelReader (File f) throws IOException {
-
- if (f.getName().endsWith(".gz")) {
- input = new DataInputStream(
- new GZIPInputStream(new FileInputStream(f)));
- }
- else {
- input = new DataInputStream(new FileInputStream(f));
- }
-
+ super(new BinaryFileDataReader(dis));
}
-
- public int readInt () throws java.io.IOException {
- return input.readInt();
- }
-
- public double readDouble () throws java.io.IOException {
- return input.readDouble();
- }
-
- public String readUTF () throws java.io.IOException {
- return input.readUTF();
- }
-
}
diff --git a/src/java/opennlp/maxent/io/GISModelReader.java b/src/java/opennlp/maxent/io/GISModelReader.java
index 0069106..4cf165e 100644
--- a/src/java/opennlp/maxent/io/GISModelReader.java
+++ b/src/java/opennlp/maxent/io/GISModelReader.java
@@ -17,18 +17,31 @@
package opennlp.maxent.io;
+import java.io.File;
+import java.io.IOException;
+
import opennlp.maxent.GISModel;
import opennlp.model.AbstractModel;
import opennlp.model.AbstractModelReader;
import opennlp.model.Context;
+import opennlp.model.DataReader;
/**
* Abstract parent class for readers of GISModels.
*
* @author Jason Baldridge
- * @version $Revision: 1.8 $, $Date: 2008/09/28 18:04:22 $
+ * @version $Revision: 1.9 $, $Date: 2008/11/06 19:59:44 $
*/
-public abstract class GISModelReader extends AbstractModelReader {
+public class GISModelReader extends AbstractModelReader {
+
+ public GISModelReader(File file) throws IOException {
+ super(file);
+ }
+
+ public GISModelReader(DataReader dataReader) {
+ super(dataReader);
+ }
+
/**
* Retrieve a model from disk. It assumes that models are saved in the
* following sequence:
@@ -51,24 +64,22 @@ public abstract class GISModelReader extends AbstractModelReader {
*
* @return The GISModel stored in the format and location specified to
* this GISModelReader (usually via its the constructor).
- */
- public AbstractModel getModel () throws java.io.IOException {
- checkModelType();
- int correctionConstant = getCorrectionConstant();
- double correctionParam = getCorrectionParameter();
- String[] outcomeLabels = getOutcomes();
- int[][] outcomePatterns = getOutcomePatterns();
- String[] predLabels = getPredicates();
- Context[] params = getParameters(outcomePatterns);
-
- return new GISModel(params,
- predLabels,
- outcomeLabels,
- correctionConstant,
- correctionParam);
-
- }
+ */
+ public AbstractModel constructModel() throws IOException {
+ int correctionConstant = getCorrectionConstant();
+ double correctionParam = getCorrectionParameter();
+ String[] outcomeLabels = getOutcomes();
+ int[][] outcomePatterns = getOutcomePatterns();
+ String[] predLabels = getPredicates();
+ Context[] params = getParameters(outcomePatterns);
+ return new GISModel(params,
+ predLabels,
+ outcomeLabels,
+ correctionConstant,
+ correctionParam);
+ }
+
public void checkModelType() throws java.io.IOException {
String modelType = readUTF();
if (!modelType.equals("GIS"))
diff --git a/src/java/opennlp/maxent/io/ObjectGISModelReader.java b/src/java/opennlp/maxent/io/ObjectGISModelReader.java
index 3e903a8..e4215bc 100644
--- a/src/java/opennlp/maxent/io/ObjectGISModelReader.java
+++ b/src/java/opennlp/maxent/io/ObjectGISModelReader.java
@@ -17,9 +17,10 @@
package opennlp.maxent.io;
-import java.io.IOException;
import java.io.ObjectInputStream;
+import opennlp.model.ObjectDataReader;
+
public class ObjectGISModelReader extends GISModelReader {
protected ObjectInputStream input;
@@ -31,21 +32,7 @@ public class ObjectGISModelReader extends GISModelReader {
* @param dis The DataInputStream containing the model information.
*/
- public ObjectGISModelReader(ObjectInputStream dis) {
- super();
- input = dis;
- }
-
- public int readInt() throws IOException {
- return input.readInt();
- }
-
- public double readDouble() throws IOException {
- return input.readDouble();
- }
-
- public String readUTF() throws IOException {
- return input.readUTF();
+ public ObjectGISModelReader(ObjectInputStream ois) {
+ super(new ObjectDataReader(ois));
}
-
}
diff --git a/src/java/opennlp/maxent/io/PlainTextGISModelReader.java b/src/java/opennlp/maxent/io/PlainTextGISModelReader.java
index cf1fe3e..75214ce 100644
--- a/src/java/opennlp/maxent/io/PlainTextGISModelReader.java
+++ b/src/java/opennlp/maxent/io/PlainTextGISModelReader.java
@@ -17,17 +17,19 @@
package opennlp.maxent.io;
-import java.io.*;
-import java.util.zip.*;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+
+import opennlp.model.PlainTextFileDataReader;
/**
* A reader for GIS models stored in plain text format.
*
* @author Jason Baldridge
- * @version $Revision: 1.2 $, $Date: 2008/09/28 18:04:30 $
+ * @version $Revision: 1.3 $, $Date: 2008/11/06 19:59:44 $
*/
public class PlainTextGISModelReader extends GISModelReader {
- private BufferedReader input;
/**
* Constructor which directly instantiates the BufferedReader containing
@@ -36,7 +38,7 @@ public class PlainTextGISModelReader extends GISModelReader {
* @param br The BufferedReader containing the model information.
*/
public PlainTextGISModelReader (BufferedReader br) {
- input = br;
+ super(new PlainTextFileDataReader(br));
}
/**
@@ -47,27 +49,6 @@ public PlainTextGISModelReader (BufferedReader br) {
* @param f The File in which the model is stored.
*/
public PlainTextGISModelReader (File f) throws IOException {
-
- if (f.getName().endsWith(".gz")) {
- input = new BufferedReader(new InputStreamReader(
- new GZIPInputStream(new FileInputStream(f))));
- }
- else {
- input = new BufferedReader(new FileReader(f));
- }
-
- }
-
- public int readInt () throws IOException {
- return Integer.parseInt(input.readLine());
+ super(f);
}
-
- public double readDouble () throws IOException {
- return Double.parseDouble(input.readLine());
- }
-
- public String readUTF () throws IOException {
- return input.readLine();
- }
-
}
diff --git a/src/java/opennlp/maxent/io/SuffixSensitiveGISModelReader.java b/src/java/opennlp/maxent/io/SuffixSensitiveGISModelReader.java
index 6ca1ff4..319ba59 100644
--- a/src/java/opennlp/maxent/io/SuffixSensitiveGISModelReader.java
+++ b/src/java/opennlp/maxent/io/SuffixSensitiveGISModelReader.java
@@ -39,7 +39,7 @@
* .bin --> the file is binary
*
* @author Jason Baldridge
- * @version $Revision: 1.4 $, $Date: 2008/09/28 18:04:24 $
+ * @version $Revision: 1.5 $, $Date: 2008/11/06 19:59:44 $
*/
public class SuffixSensitiveGISModelReader extends GISModelReader {
protected GISModelReader suffixAppropriateReader;
@@ -51,42 +51,9 @@ public class SuffixSensitiveGISModelReader extends GISModelReader {
* @param f The File in which the model is stored.
*/
public SuffixSensitiveGISModelReader (File f) throws IOException {
- InputStream input;
- String filename = f.getName();
-
- // handle the zipped/not zipped distinction
- if (filename.endsWith(".gz")) {
- input = new GZIPInputStream(new FileInputStream(f));
- filename = filename.substring(0,filename.length()-3);
- }
- else {
- input = new FileInputStream(f);
- }
-
- // handle the different formats
- if (filename.endsWith(".bin")) {
- suffixAppropriateReader =
- new BinaryGISModelReader(new DataInputStream(input));
- }
- // add more else ifs here to add further Reader types, e.g.
- // else if (filename.endsWith(".xml"))
- // suffixAppropriateReader = new XmlGISModelReader(input);
- // of course, a BufferedReader may not be what is wanted here,
- // so you might have to do a bit more to get
- // SuffixSensitiveGISModelReader to work for xml or other formats.
- // However, the default should be plain text (.txt).
- else { // filename ends with ".txt"
- suffixAppropriateReader =
- new PlainTextGISModelReader(
- new BufferedReader(new InputStreamReader(input)));
- }
-
+ super(f);
}
- protected SuffixSensitiveGISModelReader() {
- super();
- }
-
// activate this if adding another type of reader which can't read model
// information in the way that the default getModel() method in
// GISModelReader does.
@@ -95,18 +62,6 @@ protected SuffixSensitiveGISModelReader() {
//}
- public int readInt () throws IOException {
- return suffixAppropriateReader.readInt();
- }
-
- public double readDouble () throws IOException {
- return suffixAppropriateReader.readDouble();
- }
-
- public String readUTF () throws IOException {
- return suffixAppropriateReader.readUTF();
- }
-
/**
* To convert between different formats of the new style.
*
diff --git a/src/java/opennlp/model/AbstractEventStream.java b/src/java/opennlp/model/AbstractEventStream.java
new file mode 100644
index 0000000..8562772
--- /dev/null
+++ b/src/java/opennlp/model/AbstractEventStream.java
@@ -0,0 +1,14 @@
+package opennlp.model;
+
+
+public abstract class AbstractEventStream implements EventStream {
+
+ public AbstractEventStream() {
+ super();
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+}
diff --git a/src/java/opennlp/model/AbstractModel.java b/src/java/opennlp/model/AbstractModel.java
index 7266842..ee6317f 100644
--- a/src/java/opennlp/model/AbstractModel.java
+++ b/src/java/opennlp/model/AbstractModel.java
@@ -31,12 +31,12 @@ public abstract class AbstractModel implements MaxentModel {
protected EvalParameters evalParams;
protected Prior prior;
- public AbstractModel (Context[] params, String[] predLabels, String[] outcomeNames) {
+ public AbstractModel(Context[] params, String[] predLabels, String[] outcomeNames) {
init(predLabels,outcomeNames);
this.evalParams = new EvalParameters(params,ocNames.length);
}
- public AbstractModel (Context[] params, String[] predLabels, String[] outcomeNames, int correctionConstant,double correctionParam) {
+ public AbstractModel(Context[] params, String[] predLabels, String[] outcomeNames, int correctionConstant,double correctionParam) {
init(predLabels,outcomeNames);
this.evalParams = new EvalParameters(params,correctionConstant,correctionParam,ocNames.length);
}
@@ -121,7 +121,7 @@ public int getIndex(String outcome) {
}
public int getNumOutcomes() {
- return(evalParams.numOutcomes);
+ return(evalParams.getNumOutcomes());
}
/**
@@ -146,11 +146,11 @@ public int getNumOutcomes() {
*/
public final Object[] getDataStructures() {
Object[] data = new Object[5];
- data[0] = evalParams.params;
+ data[0] = evalParams.getParams();
data[1] = pmap;
data[2] = ocNames;
- data[3] = new Integer((int)evalParams.correctionConstant);
- data[4] = new Double(evalParams.correctionParam);
+ data[3] = new Integer((int)evalParams.getCorrectionConstant());
+ data[4] = new Double(evalParams.getCorrectionParam());
return data;
}
}
diff --git a/src/java/opennlp/model/AbstractModelReader.java b/src/java/opennlp/model/AbstractModelReader.java
index c562ec2..e837b36 100644
--- a/src/java/opennlp/model/AbstractModelReader.java
+++ b/src/java/opennlp/model/AbstractModelReader.java
@@ -17,7 +17,12 @@
package opennlp.model;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
import java.util.StringTokenizer;
+import java.util.zip.GZIPInputStream;
public abstract class AbstractModelReader {
@@ -26,29 +31,63 @@ public abstract class AbstractModelReader {
* The number of predicates contained in the model.
*/
protected int NUM_PREDS;
+ protected DataReader dataReader;
+
+ public AbstractModelReader(File f) throws IOException {
+ String filename = f.getName();
+ InputStream input;
+ // handle the zipped/not zipped distinction
+ if (filename.endsWith(".gz")) {
+ input = new GZIPInputStream(new FileInputStream(f));
+ filename = filename.substring(0,filename.length()-3);
+ }
+ else {
+ input = new FileInputStream(f);
+ }
- public AbstractModelReader() {
- super();
+ // handle the different formats
+ if (filename.endsWith(".bin")) {
+ this.dataReader = new BinaryFileDataReader(input);
+ }
+ else { // filename ends with ".txt"
+ this.dataReader = new PlainTextFileDataReader(input);
+ }
}
+ public AbstractModelReader(DataReader dataReader) {
+ super();
+ this.dataReader = dataReader;
+ }
+
/**
* Implement as needed for the format the model is stored in.
*/
- public abstract int readInt() throws java.io.IOException;
+ public int readInt() throws java.io.IOException {
+ return dataReader.readInt();
+ }
/**
* Implement as needed for the format the model is stored in.
*/
- public abstract double readDouble() throws java.io.IOException;
+ public double readDouble() throws java.io.IOException {
+ return dataReader.readDouble();
+ }
/**
* Implement as needed for the format the model is stored in.
*/
- public abstract String readUTF() throws java.io.IOException;
+ public String readUTF() throws java.io.IOException {
+ return dataReader.readUTF();
+ }
- public abstract AbstractModel getModel () throws java.io.IOException;
+ public AbstractModel getModel() throws IOException {
+ checkModelType();
+ return constructModel();
+ }
public abstract void checkModelType() throws java.io.IOException;
+
+ public abstract AbstractModel constructModel() throws java.io.IOException;
protected String[] getOutcomes() throws java.io.IOException {
int numOutcomes = readInt();
diff --git a/src/java/opennlp/model/BinaryFileDataReader.java b/src/java/opennlp/model/BinaryFileDataReader.java
new file mode 100644
index 0000000..596cc4c
--- /dev/null
+++ b/src/java/opennlp/model/BinaryFileDataReader.java
@@ -0,0 +1,44 @@
+package opennlp.model;
+
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.zip.GZIPInputStream;
+
+public class BinaryFileDataReader implements DataReader {
+
+ private DataInputStream input;
+
+ public BinaryFileDataReader(File f) throws IOException {
+ if (f.getName().endsWith(".gz")) {
+ input = new DataInputStream(
+ new GZIPInputStream(new FileInputStream(f)));
+ }
+ else {
+ input = new DataInputStream(new FileInputStream(f));
+ }
+ }
+
+ public BinaryFileDataReader(InputStream in) {
+ input = new DataInputStream(in);
+ }
+
+ public BinaryFileDataReader(DataInputStream in) {
+ input = in;
+ }
+
+ public double readDouble() throws IOException {
+ return input.readDouble();
+ }
+
+ public int readInt() throws IOException {
+ return input.readInt();
+ }
+
+ public String readUTF() throws IOException {
+ return input.readUTF();
+ }
+
+}
diff --git a/src/java/opennlp/model/DataReader.java b/src/java/opennlp/model/DataReader.java
new file mode 100644
index 0000000..d0c30a1
--- /dev/null
+++ b/src/java/opennlp/model/DataReader.java
@@ -0,0 +1,12 @@
+package opennlp.model;
+
+import java.io.IOException;
+
+public interface DataReader {
+
+ public double readDouble() throws IOException;
+
+ public int readInt() throws IOException;
+
+ public String readUTF() throws IOException;
+}
diff --git a/src/java/opennlp/model/DynamicEvalParameters.java b/src/java/opennlp/model/DynamicEvalParameters.java
new file mode 100644
index 0000000..233aa10
--- /dev/null
+++ b/src/java/opennlp/model/DynamicEvalParameters.java
@@ -0,0 +1,33 @@
+package opennlp.model;
+
+import java.util.List;
+
+public class DynamicEvalParameters {
+
+ /** Mapping between outcomes and paramater values for each context.
+ * The integer representation of the context can be found using pmap
.*/
+ private List extends Context> params;
+
+ /** The number of outcomes being predicted. */
+ private final int numOutcomes;
+
+
+ /**
+ * Creates a set of paramters which can be evaulated with the eval method.
+ * @param params The parameters of the model.
+ * @param numOutcomes The number of outcomes.
+ */
+ public DynamicEvalParameters(List extends Context> params, int numOutcomes) {
+ this.params = params;
+ this.numOutcomes = numOutcomes;
+ }
+
+ public Context[] getParams() {
+ return params.toArray(new Context[params.size()]);
+ }
+
+ public int getNumOutcomes() {
+ return numOutcomes;
+ }
+
+}
diff --git a/src/java/opennlp/model/EvalParameters.java b/src/java/opennlp/model/EvalParameters.java
index 3df3afd..30f6d35 100644
--- a/src/java/opennlp/model/EvalParameters.java
+++ b/src/java/opennlp/model/EvalParameters.java
@@ -29,22 +29,18 @@ public class EvalParameters {
/** Mapping between outcomes and paramater values for each context.
* The integer representation of the context can be found using pmap
.*/
- public Context[] params;
+ private Context[] params;
/** The number of outcomes being predicted. */
- public final int numOutcomes;
+ private final int numOutcomes;
/** The maximum number of feattures fired in an event. Usually refered to a C.
* This is used to normalize the number of features which occur in an event. */
- public double correctionConstant;
+ private double correctionConstant;
/** Stores inverse of the correction constant, 1/C. */
- public final double constantInverse;
+ private final double constantInverse;
/** The correction parameter of the model. */
- public double correctionParam;
- /** Log of 1/C; initial value of probabilities. */
- private final double iprob;
+ private double correctionParam;
- private String[] outcomeLabels;
-
/**
* Creates a set of paramters which can be evaulated with the eval method.
* @param params The parameters of the model.
@@ -58,26 +54,39 @@ public EvalParameters(Context[] params, double correctionParam, double correctio
this.numOutcomes = numOutcomes;
this.correctionConstant = correctionConstant;
this.constantInverse = 1.0 / correctionConstant;
- this.iprob = Math.log(1.0/numOutcomes);
}
public EvalParameters(Context[] params, int numOutcomes) {
this(params,0,0,numOutcomes);
}
- public EvalParameters(Context[] params, int numOutcomes, String[] outcomeLabels) {
- this(params,0,0,numOutcomes);
- this.outcomeLabels = outcomeLabels;
+ /* (non-Javadoc)
+ * @see opennlp.model.EvalParameters#getParams()
+ */
+ public Context[] getParams() {
+ return params;
}
-
- public void setOutcomes(String[] outcomeLabels) {
- this.outcomeLabels = outcomeLabels;
+
+ /* (non-Javadoc)
+ * @see opennlp.model.EvalParameters#getNumOutcomes()
+ */
+ public int getNumOutcomes() {
+ return numOutcomes;
+ }
+
+ public double getCorrectionConstant() {
+ return correctionConstant;
+ }
+
+ public double getConstantInverse() {
+ return constantInverse;
+ }
+
+ public double getCorrectionParam() {
+ return correctionParam;
}
- public String getOutcomeLabel(int oi) {
- if (outcomeLabels != null) {
- return outcomeLabels[oi];
- }
- return null;
+ public void setCorrectionParam(double correctionParam) {
+ this.correctionParam = correctionParam;
}
}
\ No newline at end of file
diff --git a/src/java/opennlp/model/EventCollectorAsStream.java b/src/java/opennlp/model/EventCollectorAsStream.java
index a412bd3..647524e 100644
--- a/src/java/opennlp/model/EventCollectorAsStream.java
+++ b/src/java/opennlp/model/EventCollectorAsStream.java
@@ -26,7 +26,7 @@
* @author Jason Baldridge
* @version $Revision$, $Date$
*/
-public final class EventCollectorAsStream implements EventStream {
+public final class EventCollectorAsStream extends AbstractEventStream {
final Event[] events;
final int numEvents;
int index = 0;
@@ -36,12 +36,12 @@ public EventCollectorAsStream (EventCollector ec) {
numEvents = events.length;
}
- public Event nextEvent () {
- return events[index++];
+ public Event next () {
+ return events[index++];
}
public boolean hasNext () {
- return (index < numEvents);
+ return (index < numEvents);
}
}
diff --git a/src/java/opennlp/model/EventStream.java b/src/java/opennlp/model/EventStream.java
index 370cd9f..7bc0c99 100644
--- a/src/java/opennlp/model/EventStream.java
+++ b/src/java/opennlp/model/EventStream.java
@@ -17,6 +17,8 @@
package opennlp.model;
+import java.util.Iterator;
+
/**
* A object which can deliver a stream of training events for the GIS
* procedure (or others such as IIS if and when they are implemented).
@@ -28,14 +30,14 @@
* @version $Revision$, $Date$
*
*/
-public interface EventStream {
+public interface EventStream extends Iterator{
/**
* Returns the next Event object held in this EventStream.
*
* @return the Event object which is next in this EventStream
*/
- public Event nextEvent ();
+ public Event next ();
/**
* Test whether there are any Events remaining in this EventStream.
@@ -43,5 +45,6 @@ public interface EventStream {
* @return true if this EventStream has more Events
*/
public boolean hasNext ();
+
}
diff --git a/src/java/opennlp/model/FileEventStream.java b/src/java/opennlp/model/FileEventStream.java
index 766cb4e..7cc9ee1 100644
--- a/src/java/opennlp/model/FileEventStream.java
+++ b/src/java/opennlp/model/FileEventStream.java
@@ -34,7 +34,7 @@
* @author Tom Morton
*
*/
-public class FileEventStream implements EventStream {
+public class FileEventStream extends AbstractEventStream {
BufferedReader reader;
String line;
@@ -76,7 +76,7 @@ public boolean hasNext() {
}
}
- public Event nextEvent() {
+ public Event next() {
StringTokenizer st = new StringTokenizer(line);
String outcome = st.nextToken();
int count = st.countTokens();
diff --git a/src/java/opennlp/model/GenericModelReader.java b/src/java/opennlp/model/GenericModelReader.java
new file mode 100644
index 0000000..8086380
--- /dev/null
+++ b/src/java/opennlp/model/GenericModelReader.java
@@ -0,0 +1,31 @@
+package opennlp.model;
+
+import java.io.File;
+import java.io.IOException;
+
+import opennlp.maxent.io.GISModelReader;
+import opennlp.perceptron.PerceptronModelReader;
+
+public class GenericModelReader extends AbstractModelReader {
+
+ private AbstractModelReader delegateModelReader;
+
+ public GenericModelReader (File f) throws IOException {
+ super(f);
+ }
+
+ public void checkModelType() throws IOException {
+ String modelType = readUTF();
+ if (modelType.equals("Perceptron")) {
+ delegateModelReader = new PerceptronModelReader(this.dataReader);
+ }
+ else if (modelType.equals("Maxent")) {
+ delegateModelReader = new GISModelReader(this.dataReader);
+ }
+ }
+
+
+ public AbstractModel constructModel() throws IOException {
+ return delegateModelReader.constructModel();
+ }
+}
diff --git a/src/java/opennlp/model/ObjectDataReader.java b/src/java/opennlp/model/ObjectDataReader.java
new file mode 100644
index 0000000..4afc09e
--- /dev/null
+++ b/src/java/opennlp/model/ObjectDataReader.java
@@ -0,0 +1,26 @@
+package opennlp.model;
+
+import java.io.IOException;
+import java.io.ObjectInputStream;
+
+public class ObjectDataReader implements DataReader {
+
+ protected ObjectInputStream ois;
+
+ public ObjectDataReader(ObjectInputStream ois) {
+ this.ois = ois;
+ }
+
+ public double readDouble() throws IOException {
+ return ois.readDouble();
+ }
+
+ public int readInt() throws IOException {
+ return ois.readInt();
+ }
+
+ public String readUTF() throws IOException {
+ return ois.readUTF();
+ }
+
+}
diff --git a/src/java/opennlp/model/OnePassDataIndexer.java b/src/java/opennlp/model/OnePassDataIndexer.java
index de8a825..018f167 100644
--- a/src/java/opennlp/model/OnePassDataIndexer.java
+++ b/src/java/opennlp/model/OnePassDataIndexer.java
@@ -104,7 +104,7 @@ private LinkedList computeEventCounts(EventStream eventStream,Map counter = new HashMap();
LinkedList events = new LinkedList();
while (eventStream.hasNext()) {
- Event ev = eventStream.nextEvent();
+ Event ev = eventStream.next();
events.addLast(ev);
update(ev.getContext(),predicateSet,counter,cutoff);
}
diff --git a/src/java/opennlp/model/PlainTextFileDataReader.java b/src/java/opennlp/model/PlainTextFileDataReader.java
new file mode 100644
index 0000000..970289a
--- /dev/null
+++ b/src/java/opennlp/model/PlainTextFileDataReader.java
@@ -0,0 +1,44 @@
+package opennlp.model;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.zip.GZIPInputStream;
+
+public class PlainTextFileDataReader implements DataReader {
+
+ private BufferedReader input;
+
+ public PlainTextFileDataReader(File f) throws IOException {
+ if (f.getName().endsWith(".gz")) {
+ input = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(f))));
+ }
+ else {
+ input = new BufferedReader(new InputStreamReader(new FileInputStream(f)));
+ }
+ }
+
+ public PlainTextFileDataReader(InputStream in) {
+ input = new BufferedReader(new InputStreamReader(in));
+ }
+
+ public PlainTextFileDataReader(BufferedReader in) {
+ input = in;
+ }
+
+ public double readDouble() throws IOException {
+ return Double.parseDouble(input.readLine());
+ }
+
+ public int readInt() throws IOException {
+ return Integer.parseInt(input.readLine());
+ }
+
+ public String readUTF() throws IOException {
+ return input.readLine();
+ }
+
+}
diff --git a/src/java/opennlp/model/RealValueFileEventStream.java b/src/java/opennlp/model/RealValueFileEventStream.java
index c778be9..b2b85ca 100644
--- a/src/java/opennlp/model/RealValueFileEventStream.java
+++ b/src/java/opennlp/model/RealValueFileEventStream.java
@@ -74,7 +74,7 @@ public static float[] parseContexts(String[] contexts) {
return values;
}
- public Event nextEvent() {
+ public Event next() {
int si = line.indexOf(' ');
String outcome = line.substring(0,si);
String[] contexts = line.substring(si+1).split(" ");
diff --git a/src/java/opennlp/model/TwoPassDataIndexer.java b/src/java/opennlp/model/TwoPassDataIndexer.java
index cb1f766..e2282b1 100644
--- a/src/java/opennlp/model/TwoPassDataIndexer.java
+++ b/src/java/opennlp/model/TwoPassDataIndexer.java
@@ -112,7 +112,7 @@ private int computeEventCounts(EventStream eventStream, Writer eventStore, Map predicateI
List eventsToCompare = new ArrayList(numEvents);
List indexedContext = new ArrayList();
while (es.hasNext()) {
- Event ev = es.nextEvent();
+ Event ev = es.next();
String[] econtext = ev.getContext();
ComparableEvent ce;
diff --git a/src/java/opennlp/perceptron/BinaryPerceptronModelReader.java b/src/java/opennlp/perceptron/BinaryPerceptronModelReader.java
new file mode 100644
index 0000000..f9e6a8e
--- /dev/null
+++ b/src/java/opennlp/perceptron/BinaryPerceptronModelReader.java
@@ -0,0 +1,32 @@
+package opennlp.perceptron;
+
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.IOException;
+
+import opennlp.model.BinaryFileDataReader;
+
+public class BinaryPerceptronModelReader extends PerceptronModelReader {
+
+
+ /**
+ * Constructor which directly instantiates the DataInputStream containing
+ * the model contents.
+ *
+ * @param dis The DataInputStream containing the model information.
+ */
+ public BinaryPerceptronModelReader(DataInputStream dis) {
+ super(new BinaryFileDataReader(dis));
+ }
+
+ /**
+ * Constructor which takes a File and creates a reader for it. Detects
+ * whether the file is gzipped or not based on whether the suffix contains
+ * ".gz"
+ *
+ * @param f The File in which the model is stored.
+ */
+ public BinaryPerceptronModelReader (File f) throws IOException {
+ super(f);
+ }
+}
diff --git a/src/java/opennlp/perceptron/BinaryPerceptronModelWriter.java b/src/java/opennlp/perceptron/BinaryPerceptronModelWriter.java
new file mode 100644
index 0000000..ba72c7e
--- /dev/null
+++ b/src/java/opennlp/perceptron/BinaryPerceptronModelWriter.java
@@ -0,0 +1,87 @@
+///////////////////////////////////////////////////////////////////////////////
+//Copyright (C) 2001 Jason Baldridge and Gann Bierner
+
+//This library is free software; you can redistribute it and/or
+//modify it under the terms of the GNU Lesser General Public
+//License as published by the Free Software Foundation; either
+//version 2.1 of the License, or (at your option) any later version.
+
+//This library is distributed in the hope that it will be useful,
+//but WITHOUT ANY WARRANTY; without even the implied warranty of
+//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//GNU General Public License for more details.
+
+//You should have received a copy of the GNU Lesser General Public
+//License along with this program; if not, write to the Free Software
+//Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+package opennlp.perceptron;
+
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.zip.GZIPOutputStream;
+
+import opennlp.model.AbstractModel;
+
+/**
+ * Model writer that saves models in binary format.
+ *
+ * @author Jason Baldridge
+ * @version $Revision: 1.1 $, $Date: 2008/11/06 19:59:44 $
+ */
+public class BinaryPerceptronModelWriter extends PerceptronModelWriter {
+ DataOutputStream output;
+
+ /**
+ * Constructor which takes a GISModel and a File and prepares itself to
+ * write the model to that file. Detects whether the file is gzipped or not
+ * based on whether the suffix contains ".gz".
+ *
+ * @param model The GISModel which is to be persisted.
+ * @param f The File in which the model is to be persisted.
+ */
+ public BinaryPerceptronModelWriter (AbstractModel model, File f) throws IOException {
+
+ super(model);
+
+ if (f.getName().endsWith(".gz")) {
+ output = new DataOutputStream(
+ new GZIPOutputStream(new FileOutputStream(f)));
+ }
+ else {
+ output = new DataOutputStream(new FileOutputStream(f));
+ }
+ }
+
+ /**
+ * Constructor which takes a GISModel and a DataOutputStream and prepares
+ * itself to write the model to that stream.
+ *
+ * @param model The GISModel which is to be persisted.
+ * @param dos The stream which will be used to persist the model.
+ */
+ public BinaryPerceptronModelWriter (AbstractModel model, DataOutputStream dos) {
+ super(model);
+ output = dos;
+ }
+
+ protected void writeUTF (String s) throws java.io.IOException {
+ output.writeUTF(s);
+ }
+
+ protected void writeInt (int i) throws java.io.IOException {
+ output.writeInt(i);
+ }
+
+ protected void writeDouble (double d) throws java.io.IOException {
+ output.writeDouble(d);
+ }
+
+ protected void close () throws java.io.IOException {
+ output.flush();
+ output.close();
+ }
+
+}
diff --git a/src/java/opennlp/perceptron/PerceptronModel.java b/src/java/opennlp/perceptron/PerceptronModel.java
new file mode 100644
index 0000000..06ed7e2
--- /dev/null
+++ b/src/java/opennlp/perceptron/PerceptronModel.java
@@ -0,0 +1,109 @@
+package opennlp.perceptron;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.InputStreamReader;
+import java.text.DecimalFormat;
+
+import opennlp.model.AbstractModel;
+import opennlp.model.Context;
+import opennlp.model.EvalParameters;
+
+public class PerceptronModel extends AbstractModel {
+
+
+ public PerceptronModel(Context[] params, String[] predLabels, String[] outcomeNames) {
+ super(params,predLabels,outcomeNames);
+ }
+
+ public double[] eval(String[] context) {
+ return eval(context,new double[evalParams.getNumOutcomes()]);
+ }
+
+ public double[] eval(String[] context, float[] values) {
+ return eval(context,values,new double[evalParams.getNumOutcomes()]);
+ }
+
+ public double[] eval(String[] context, double[] probs) {
+ return eval(context,null,probs);
+ }
+
+ public double[] eval(String[] context, float[] values,double[] outsums) {
+ int[] scontexts = new int[context.length];
+ java.util.Arrays.fill(outsums, 0);
+ for (int i=0; i= 0) {
+ Context predParams = params[context[ci]];
+ activeOutcomes = predParams.getOutcomes();
+ activeParameters = predParams.getParameters();
+ if (values != null) {
+ value = values[ci];
+ }
+ for (int ai = 0; ai < activeOutcomes.length; ai++) {
+ int oid = activeOutcomes[ai];
+ prior[oid] += activeParameters[ai] * value;
+ }
+ }
+ }
+ if (normalize) {
+ double normal = 0.0;
+ double min = prior[0];
+ for (int oid = 0; oid < model.getNumOutcomes(); oid++) {
+ if (prior[oid] < min) {
+ min = prior[oid];
+ }
+ }
+ for (int oid = 0; oid < model.getNumOutcomes(); oid++) {
+ if (min < 0) {
+ prior[oid]+=(-1*min);
+ }
+ normal += prior[oid];
+ }
+ if (normal == 0.0) {
+ for (int oid = 0; oid < model.getNumOutcomes(); oid++) {
+ prior[oid] = (double) 1/model.getNumOutcomes();
+ }
+ }
+ else {
+ for (int oid = 0; oid < model.getNumOutcomes(); oid++) {
+ prior[oid] /= normal;
+ }
+ }
+ }
+ return prior;
+ }
+
+ public static void main(String[] args) throws java.io.IOException {
+ if (args.length == 0) {
+ System.err.println("Usage: PerceptronModel modelname < contexts");
+ System.exit(1);
+ }
+ AbstractModel m = new PerceptronModelReader(new File(args[0])).getModel();
+ BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
+ DecimalFormat df = new java.text.DecimalFormat(".###");
+ for (String line = in.readLine(); line != null; line = in.readLine()) {
+ String[] context = line.split(" ");
+ double[] dist = m.eval(context);
+ for (int oi=0;oiPerceptron (model type identifier)
+ *
1. # of parameters (int)
+ *
2. # of outcomes (int)
+ *
* list of outcome names (String)
+ *
3. # of different types of outcome patterns (int)
+ *
* list of (int int[])
+ *
[# of predicates for which outcome pattern is true] [outcome pattern]
+ *
4. # of predicates (int)
+ *
* list of predicate names (String)
+ *
+ * If you are creating a reader for a format which won't work with this
+ * (perhaps a database or xml file), override this method and ignore the
+ * other methods provided in this abstract class.
+ *
+ * @return The PerceptronModel stored in the format and location specified to
+ * this PerceptronModelReader (usually via its the constructor).
+ */
+ public AbstractModel constructModel() throws IOException {
+ String[] outcomeLabels = getOutcomes();
+ int[][] outcomePatterns = getOutcomePatterns();
+ String[] predLabels = getPredicates();
+ Context[] params = getParameters(outcomePatterns);
+
+ return new PerceptronModel(params,
+ predLabels,
+ outcomeLabels);
+ }
+
+ public void checkModelType() throws java.io.IOException {
+ String modelType = readUTF();
+ if (!modelType.equals("Perceptron"))
+ System.out.println("Error: attempting to load a "+modelType+
+ " model as a Perceptron model."+
+ " You should expect problems.");
+ }
+}
diff --git a/src/java/opennlp/perceptron/PerceptronModelWriter.java b/src/java/opennlp/perceptron/PerceptronModelWriter.java
new file mode 100644
index 0000000..c9ccbfe
--- /dev/null
+++ b/src/java/opennlp/perceptron/PerceptronModelWriter.java
@@ -0,0 +1,151 @@
+package opennlp.perceptron;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import opennlp.model.AbstractModel;
+import opennlp.model.ComparablePredicate;
+import opennlp.model.Context;
+
+/**
+ * Abstract parent class for Perceptron writers. It provides the persist method
+ * which takes care of the structure of a stored document, and requires an
+ * extending class to define precisely how the data should be stored.
+ *
+ * @author Jason Baldridge
+ * @version $Revision: 1.1 $, $Date: 2008/11/06 19:59:44 $
+ */
+public abstract class PerceptronModelWriter {
+ protected Context[] PARAMS;
+ protected String[] OUTCOME_LABELS;
+ protected String[] PRED_LABELS;
+ int numOutcomes;
+
+ public PerceptronModelWriter (AbstractModel model) {
+
+ Object[] data = model.getDataStructures();
+ this.numOutcomes = model.getNumOutcomes();
+ PARAMS = (Context[]) data[0];
+ Map pmap = (Map)data[1];
+ OUTCOME_LABELS = (String[])data[2];
+
+ PRED_LABELS = new String[pmap.size()];
+ for (String pred : pmap.keySet()) {
+ PRED_LABELS[pmap.get(pred)] = pred;
+ }
+ }
+
+ protected abstract void writeUTF (String s) throws java.io.IOException;
+ protected abstract void writeInt (int i) throws java.io.IOException;
+ protected abstract void writeDouble (double d) throws java.io.IOException;
+ protected abstract void close () throws java.io.IOException;
+
+ /**
+ * Writes the model to disk, using the writeX()
methods
+ * provided by extending classes.
+ *
+ * If you wish to create a PerceptronModelWriter which uses a different
+ * structure, it will be necessary to override the persist method in
+ * addition to implementing the writeX()
methods.
+ */
+ public void persist() throws IOException {
+
+ // the type of model (GIS)
+ writeUTF("Perceptron");
+
+ // the mapping from outcomes to their integer indexes
+ writeInt(OUTCOME_LABELS.length);
+
+ for (int i=0; i modelDistribution[max]) {
+ max = oi;
+ }
+ }
+ if (max == outcomeList[ei]) {
+ numCorrect += numTimesEventsSeen[ei];
+ }
+ for (int oi = 0;oi "+averageParams[pi].getParameters()[oi]);
+ updates[pi][oi][VALUE] = (int) params[pi].getParameters()[oi];
+ updates[pi][oi][ITER] = iteration;
+ updates[pi][oi][EVENT] = ei;
+ }
+ }
+ }
+ }
+ else {
+ if (modelDistribution[oi] > 0) {
+ for (int ci = 0; ci < contexts[ei].length; ci++) {
+ int pi = contexts[ei][ci];
+ if (values == null) {
+ params[pi].updateParameter(oi,-1);
+ }
+ else {
+ params[pi].updateParameter(oi, values[ei][ci]*-1);
+ }
+ if (useAverage) {
+ if (updates[pi][oi][VALUE] != 0) {
+ averageParams[pi].updateParameter(oi,updates[pi][oi][VALUE]*(numEvents*(iteration-updates[pi][oi][ITER])+(ei-updates[pi][oi][EVENT])));
+ }
+ //System.err.println("updates["+pi+"]["+oi+"]=("+updates[pi][oi][ITER]+","+updates[pi][oi][EVENT]+","+updates[pi][oi][VALUE]+") + ("+iteration+","+ei+","+params[pi].getParameters()[oi]+") -> "+averageParams[pi].getParameters()[oi]);
+ updates[pi][oi][VALUE] = (int) params[pi].getParameters()[oi];
+ updates[pi][oi][ITER] = iteration;
+ updates[pi][oi][EVENT] = ei;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ //finish average computation
+ double totIterations = (double) iterations*numEvents;
+ if (useAverage && iteration == iterations-1) {
+ for (int pi = 0; pi < numPreds; pi++) {
+ double[] predParams = averageParams[pi].getParameters();
+ for (int oi = 0;oi "+averageParams[pi].getParameters()[oi]);
+ }
+ }
+ }
+ }
+ display(". "+((double) numCorrect / numEvents) + "\n");
+ }
+}
diff --git a/src/java/opennlp/perceptron/PlainTextPerceptronModelReader.java b/src/java/opennlp/perceptron/PlainTextPerceptronModelReader.java
new file mode 100644
index 0000000..fa964fd
--- /dev/null
+++ b/src/java/opennlp/perceptron/PlainTextPerceptronModelReader.java
@@ -0,0 +1,31 @@
+package opennlp.perceptron;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+
+import opennlp.model.PlainTextFileDataReader;
+
+public class PlainTextPerceptronModelReader extends PerceptronModelReader {
+
+ /**
+ * Constructor which directly instantiates the BufferedReader containing
+ * the model contents.
+ *
+ * @param br The BufferedReader containing the model information.
+ */
+ public PlainTextPerceptronModelReader(BufferedReader br) {
+ super(new PlainTextFileDataReader(br));
+ }
+
+ /**
+ * Constructor which takes a File and creates a reader for it. Detects
+ * whether the file is gzipped or not based on whether the suffix contains
+ * ".gz".
+ *
+ * @param f The File in which the model is stored.
+ */
+ public PlainTextPerceptronModelReader (File f) throws IOException {
+ super(f);
+ }
+}
diff --git a/src/java/opennlp/perceptron/PlainTextPerceptronModelWriter.java b/src/java/opennlp/perceptron/PlainTextPerceptronModelWriter.java
new file mode 100644
index 0000000..63627ba
--- /dev/null
+++ b/src/java/opennlp/perceptron/PlainTextPerceptronModelWriter.java
@@ -0,0 +1,93 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2001 Jason Baldridge and Gann Bierner
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//////////////////////////////////////////////////////////////////////////////
+package opennlp.perceptron;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.util.zip.GZIPOutputStream;
+
+import opennlp.model.AbstractModel;
+
+/**
+ * Model writer that saves models in plain text format.
+ *
+ * @author Jason Baldridge
+ * @version $Revision: 1.1 $, $Date: 2008/11/06 19:59:44 $
+ */
+public class PlainTextPerceptronModelWriter extends PerceptronModelWriter {
+ BufferedWriter output;
+
+ /**
+ * Constructor which takes a PerceptronModel and a File and prepares itself to
+ * write the model to that file. Detects whether the file is gzipped or not
+ * based on whether the suffix contains ".gz".
+ *
+ * @param model The PerceptronModel which is to be persisted.
+ * @param f The File in which the model is to be persisted.
+ */
+ public PlainTextPerceptronModelWriter (AbstractModel model, File f)
+ throws IOException, FileNotFoundException {
+
+ super(model);
+ if (f.getName().endsWith(".gz")) {
+ output = new BufferedWriter(new OutputStreamWriter(
+ new GZIPOutputStream(new FileOutputStream(f))));
+ }
+ else {
+ output = new BufferedWriter(new FileWriter(f));
+ }
+ }
+
+ /**
+ * Constructor which takes a PerceptronModel and a BufferedWriter and prepares
+ * itself to write the model to that writer.
+ *
+ * @param model The PerceptronModel which is to be persisted.
+ * @param bw The BufferedWriter which will be used to persist the model.
+ */
+ public PlainTextPerceptronModelWriter (AbstractModel model, BufferedWriter bw) {
+ super(model);
+ output = bw;
+ }
+
+ protected void writeUTF (String s) throws java.io.IOException {
+ output.write(s);
+ output.newLine();
+ }
+
+ protected void writeInt (int i) throws java.io.IOException {
+ output.write(Integer.toString(i));
+ output.newLine();
+ }
+
+ protected void writeDouble (double d) throws java.io.IOException {
+ output.write(Double.toString(d));
+ output.newLine();
+ }
+
+ protected void close () throws java.io.IOException {
+ output.flush();
+ output.close();
+ }
+
+}
diff --git a/src/java/opennlp/perceptron/SuffixSensitivePerceptronModelWriter.java b/src/java/opennlp/perceptron/SuffixSensitivePerceptronModelWriter.java
new file mode 100644
index 0000000..f063896
--- /dev/null
+++ b/src/java/opennlp/perceptron/SuffixSensitivePerceptronModelWriter.java
@@ -0,0 +1,99 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2001 Jason Baldridge and Gann Bierner
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+//////////////////////////////////////////////////////////////////////////////
+package opennlp.perceptron;
+
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.util.zip.GZIPOutputStream;
+
+import opennlp.model.AbstractModel;
+
+/**
+ * A writer for GIS models which inspects the filename and invokes the
+ * appropriate GISModelWriter depending on the filename's suffixes.
+ *
+ * The following assumption are made about suffixes:
+ *
.gz --> the file is gzipped (must be the last suffix)
+ * .txt --> the file is plain text
+ * .bin --> the file is binary
+ *
+ * @author Jason Baldridge
+ * @version $Revision: 1.1 $, $Date: 2008/11/06 19:59:44 $
+ */
+public class SuffixSensitivePerceptronModelWriter extends PerceptronModelWriter {
+ private final PerceptronModelWriter suffixAppropriateWriter;
+
+ /**
+ * Constructor which takes a GISModel and a File and invokes the
+ * GISModelWriter appropriate for the suffix.
+ *
+ * @param model The GISModel which is to be persisted.
+ * @param f The File in which the model is to be stored.
+ */
+ public SuffixSensitivePerceptronModelWriter (AbstractModel model, File f)
+ throws IOException {
+
+ super (model);
+
+ OutputStream output;
+ String filename = f.getName();
+
+ // handle the zipped/not zipped distinction
+ if (filename.endsWith(".gz")) {
+ output = new GZIPOutputStream(new FileOutputStream(f));
+ filename = filename.substring(0,filename.length()-3);
+ }
+ else {
+ output = new DataOutputStream(new FileOutputStream(f));
+ }
+
+ // handle the different formats
+ if (filename.endsWith(".bin")) {
+ suffixAppropriateWriter =
+ new BinaryPerceptronModelWriter(model,
+ new DataOutputStream(output));
+ }
+ else { // default is ".txt"
+ suffixAppropriateWriter =
+ new PlainTextPerceptronModelWriter(model,
+ new BufferedWriter(new OutputStreamWriter(output)));
+ }
+ }
+
+ protected void writeUTF (String s) throws java.io.IOException {
+ suffixAppropriateWriter.writeUTF(s);
+ }
+
+ protected void writeInt (int i) throws java.io.IOException {
+ suffixAppropriateWriter.writeInt(i);
+ }
+
+ protected void writeDouble (double d) throws java.io.IOException {
+ suffixAppropriateWriter.writeDouble(d);
+ }
+
+ protected void close () throws java.io.IOException {
+ suffixAppropriateWriter.close();
+ }
+
+}