diff --git a/src/java/opennlp/maxent/BasicEventStream.java b/src/java/opennlp/maxent/BasicEventStream.java index 4c75a6d..1bea488 100644 --- a/src/java/opennlp/maxent/BasicEventStream.java +++ b/src/java/opennlp/maxent/BasicEventStream.java @@ -17,8 +17,8 @@ package opennlp.maxent; +import opennlp.model.AbstractEventStream; import opennlp.model.Event; -import opennlp.model.EventStream; /** * A object which can deliver a stream of training events assuming @@ -30,9 +30,9 @@ *

cp_1 cp_2 ... cp_n outcome * * @author Jason Baldridge - * @version $Revision: 1.4 $, $Date: 2008/09/28 18:03:47 $ + * @version $Revision: 1.5 $, $Date: 2008/11/06 19:59:44 $ */ -public class BasicEventStream implements EventStream { +public class BasicEventStream extends AbstractEventStream { ContextGenerator cg = new BasicContextGenerator(); DataStream ds; Event next; @@ -48,7 +48,7 @@ public BasicEventStream (DataStream ds) { * * @return the Event object which is next in this EventStream */ - public Event nextEvent () { + public Event next () { while (next == null && this.ds.hasNext()) next = createEvent((String)this.ds.nextToken()); diff --git a/src/java/opennlp/maxent/GISModel.java b/src/java/opennlp/maxent/GISModel.java index fb3dd15..f4ab161 100644 --- a/src/java/opennlp/maxent/GISModel.java +++ b/src/java/opennlp/maxent/GISModel.java @@ -33,7 +33,7 @@ * Iterative Scaling procedure (implemented in GIS.java). * * @author Tom Morton and Jason Baldridge - * @version $Revision: 1.23 $, $Date: 2008/09/28 18:03:50 $ + * @version $Revision: 1.24 $, $Date: 2008/11/06 19:59:44 $ */ public final class GISModel extends AbstractModel { /** @@ -77,11 +77,11 @@ public GISModel (Context[] params, String[] predLabels, String[] outcomeNames, i * getOutcome(int i). */ public final double[] eval(String[] context) { - return(eval(context,new double[evalParams.numOutcomes])); + return(eval(context,new double[evalParams.getNumOutcomes()])); } public final double[] eval(String[] context, float[] values) { - return(eval(context,values,new double[evalParams.numOutcomes])); + return(eval(context,values,new double[evalParams.getNumOutcomes()])); } public final double[] eval(String[] context, double[] outsums) { @@ -144,8 +144,8 @@ public static double[] eval(int[] context, double[] prior, EvalParameters model) * getOutcome(int i). */ public static double[] eval(int[] context, float[] values, double[] prior, EvalParameters model) { - Context[] params = model.params; - int numfeats[] = new int[model.numOutcomes]; + Context[] params = model.getParams(); + int numfeats[] = new int[model.getNumOutcomes()]; int[] activeOutcomes; double[] activeParameters; double value = 1; @@ -166,17 +166,17 @@ public static double[] eval(int[] context, float[] values, double[] prior, EvalP } double normal = 0.0; - for (int oid = 0; oid < model.numOutcomes; oid++) { - if (model.correctionParam != 0) { - prior[oid] = Math.exp(prior[oid]*model.constantInverse+((1.0 - ((double) numfeats[oid] / model.correctionConstant)) * model.correctionParam)); + for (int oid = 0; oid < model.getNumOutcomes(); oid++) { + if (model.getCorrectionParam() != 0) { + prior[oid] = Math.exp(prior[oid]*model.getConstantInverse()+((1.0 - ((double) numfeats[oid] / model.getCorrectionConstant())) * model.getCorrectionParam())); } else { - prior[oid] = Math.exp(prior[oid]*model.constantInverse); + prior[oid] = Math.exp(prior[oid]*model.getConstantInverse()); } normal += prior[oid]; } - for (int oid = 0; oid < model.numOutcomes; oid++) { + for (int oid = 0; oid < model.getNumOutcomes(); oid++) { prior[oid] /= normal; } return prior; diff --git a/src/java/opennlp/maxent/GISTrainer.java b/src/java/opennlp/maxent/GISTrainer.java index 5aff377..f573f4b 100644 --- a/src/java/opennlp/maxent/GISTrainer.java +++ b/src/java/opennlp/maxent/GISTrainer.java @@ -45,7 +45,7 @@ * * @author Tom Morton * @author Jason Baldridge - * @version $Revision: 1.30 $, $Date: 2008/09/28 18:03:38 $ + * @version $Revision: 1.31 $, $Date: 2008/11/06 19:59:44 $ */ class GISTrainer { @@ -364,7 +364,7 @@ else if (useSimpleSmoothing) { findParameters(iterations); /*************** Create and return the model ******************/ - return new GISModel(params, predLabels, outcomeLabels, correctionConstant, evalParams.correctionParam); + return new GISModel(params, predLabels, outcomeLabels, correctionConstant, evalParams.getCorrectionParam()); } @@ -467,7 +467,7 @@ private double nextIteration() { } } if (useSlackParameter) - CFMOD += (evalParams.correctionConstant - contexts[ei].length) * numTimesEventsSeen[ei]; + CFMOD += (evalParams.getCorrectionConstant() - contexts[ei].length) * numTimesEventsSeen[ei]; loglikelihood += Math.log(modelDistribution[outcomeList[ei]]) * numTimesEventsSeen[ei]; numEvents += numTimesEventsSeen[ei]; @@ -493,7 +493,7 @@ private double nextIteration() { int[] activeOutcomes = params[pi].getOutcomes(); for (int aoi=0;aoi 0.0 && useSlackParameter) - evalParams.correctionParam += (cfObservedExpect - Math.log(CFMOD)); + evalParams.setCorrectionParam(evalParams.getCorrectionParam() + (cfObservedExpect - Math.log(CFMOD))); display(". loglikelihood=" + loglikelihood + "\t" + ((double) numCorrect / numEvents) + "\n"); return (loglikelihood); diff --git a/src/java/opennlp/maxent/RealBasicEventStream.java b/src/java/opennlp/maxent/RealBasicEventStream.java index b95adc9..1b23a66 100644 --- a/src/java/opennlp/maxent/RealBasicEventStream.java +++ b/src/java/opennlp/maxent/RealBasicEventStream.java @@ -17,11 +17,12 @@ package opennlp.maxent; +import opennlp.model.AbstractEventStream; import opennlp.model.Event; import opennlp.model.EventStream; import opennlp.model.RealValueFileEventStream; -public class RealBasicEventStream implements EventStream { +public class RealBasicEventStream extends AbstractEventStream { ContextGenerator cg = new BasicContextGenerator(); DataStream ds; Event next; @@ -33,7 +34,7 @@ public RealBasicEventStream(DataStream ds) { } - public Event nextEvent() { + public Event next() { while (next == null && this.ds.hasNext()) next = createEvent((String)this.ds.nextToken()); @@ -67,7 +68,7 @@ private Event createEvent(String obs) { public static void main(String[] args) throws java.io.IOException { EventStream es = new RealBasicEventStream(new PlainTextByLineDataStream(new java.io.FileReader(args[0]))); while (es.hasNext()) { - System.out.println(es.nextEvent()); + System.out.println(es.next()); } } } diff --git a/src/java/opennlp/maxent/io/BinaryGISModelReader.java b/src/java/opennlp/maxent/io/BinaryGISModelReader.java index 5151a7b..e832063 100644 --- a/src/java/opennlp/maxent/io/BinaryGISModelReader.java +++ b/src/java/opennlp/maxent/io/BinaryGISModelReader.java @@ -17,17 +17,17 @@ package opennlp.maxent.io; -import java.io.*; -import java.util.zip.*; +import java.io.DataInputStream; + +import opennlp.model.BinaryFileDataReader; /** * A reader for GIS models stored in binary format. * * @author Jason Baldridge - * @version $Revision: 1.2 $, $Date: 2008/09/28 18:04:24 $ + * @version $Revision: 1.3 $, $Date: 2008/11/06 19:59:44 $ */ public class BinaryGISModelReader extends GISModelReader { - protected DataInputStream input; /** * Constructor which directly instantiates the DataInputStream containing @@ -36,38 +36,6 @@ public class BinaryGISModelReader extends GISModelReader { * @param dis The DataInputStream containing the model information. */ public BinaryGISModelReader (DataInputStream dis) { - input = dis; - } - - /** - * Constructor which takes a File and creates a reader for it. Detects - * whether the file is gzipped or not based on whether the suffix contains - * ".gz" - * - * @param f The File in which the model is stored. - */ - public BinaryGISModelReader (File f) throws IOException { - - if (f.getName().endsWith(".gz")) { - input = new DataInputStream( - new GZIPInputStream(new FileInputStream(f))); - } - else { - input = new DataInputStream(new FileInputStream(f)); - } - + super(new BinaryFileDataReader(dis)); } - - public int readInt () throws java.io.IOException { - return input.readInt(); - } - - public double readDouble () throws java.io.IOException { - return input.readDouble(); - } - - public String readUTF () throws java.io.IOException { - return input.readUTF(); - } - } diff --git a/src/java/opennlp/maxent/io/GISModelReader.java b/src/java/opennlp/maxent/io/GISModelReader.java index 0069106..4cf165e 100644 --- a/src/java/opennlp/maxent/io/GISModelReader.java +++ b/src/java/opennlp/maxent/io/GISModelReader.java @@ -17,18 +17,31 @@ package opennlp.maxent.io; +import java.io.File; +import java.io.IOException; + import opennlp.maxent.GISModel; import opennlp.model.AbstractModel; import opennlp.model.AbstractModelReader; import opennlp.model.Context; +import opennlp.model.DataReader; /** * Abstract parent class for readers of GISModels. * * @author Jason Baldridge - * @version $Revision: 1.8 $, $Date: 2008/09/28 18:04:22 $ + * @version $Revision: 1.9 $, $Date: 2008/11/06 19:59:44 $ */ -public abstract class GISModelReader extends AbstractModelReader { +public class GISModelReader extends AbstractModelReader { + + public GISModelReader(File file) throws IOException { + super(file); + } + + public GISModelReader(DataReader dataReader) { + super(dataReader); + } + /** * Retrieve a model from disk. It assumes that models are saved in the * following sequence: @@ -51,24 +64,22 @@ public abstract class GISModelReader extends AbstractModelReader { * * @return The GISModel stored in the format and location specified to * this GISModelReader (usually via its the constructor). - */ - public AbstractModel getModel () throws java.io.IOException { - checkModelType(); - int correctionConstant = getCorrectionConstant(); - double correctionParam = getCorrectionParameter(); - String[] outcomeLabels = getOutcomes(); - int[][] outcomePatterns = getOutcomePatterns(); - String[] predLabels = getPredicates(); - Context[] params = getParameters(outcomePatterns); - - return new GISModel(params, - predLabels, - outcomeLabels, - correctionConstant, - correctionParam); - - } + */ + public AbstractModel constructModel() throws IOException { + int correctionConstant = getCorrectionConstant(); + double correctionParam = getCorrectionParameter(); + String[] outcomeLabels = getOutcomes(); + int[][] outcomePatterns = getOutcomePatterns(); + String[] predLabels = getPredicates(); + Context[] params = getParameters(outcomePatterns); + return new GISModel(params, + predLabels, + outcomeLabels, + correctionConstant, + correctionParam); + } + public void checkModelType() throws java.io.IOException { String modelType = readUTF(); if (!modelType.equals("GIS")) diff --git a/src/java/opennlp/maxent/io/ObjectGISModelReader.java b/src/java/opennlp/maxent/io/ObjectGISModelReader.java index 3e903a8..e4215bc 100644 --- a/src/java/opennlp/maxent/io/ObjectGISModelReader.java +++ b/src/java/opennlp/maxent/io/ObjectGISModelReader.java @@ -17,9 +17,10 @@ package opennlp.maxent.io; -import java.io.IOException; import java.io.ObjectInputStream; +import opennlp.model.ObjectDataReader; + public class ObjectGISModelReader extends GISModelReader { protected ObjectInputStream input; @@ -31,21 +32,7 @@ public class ObjectGISModelReader extends GISModelReader { * @param dis The DataInputStream containing the model information. */ - public ObjectGISModelReader(ObjectInputStream dis) { - super(); - input = dis; - } - - public int readInt() throws IOException { - return input.readInt(); - } - - public double readDouble() throws IOException { - return input.readDouble(); - } - - public String readUTF() throws IOException { - return input.readUTF(); + public ObjectGISModelReader(ObjectInputStream ois) { + super(new ObjectDataReader(ois)); } - } diff --git a/src/java/opennlp/maxent/io/PlainTextGISModelReader.java b/src/java/opennlp/maxent/io/PlainTextGISModelReader.java index cf1fe3e..75214ce 100644 --- a/src/java/opennlp/maxent/io/PlainTextGISModelReader.java +++ b/src/java/opennlp/maxent/io/PlainTextGISModelReader.java @@ -17,17 +17,19 @@ package opennlp.maxent.io; -import java.io.*; -import java.util.zip.*; +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; + +import opennlp.model.PlainTextFileDataReader; /** * A reader for GIS models stored in plain text format. * * @author Jason Baldridge - * @version $Revision: 1.2 $, $Date: 2008/09/28 18:04:30 $ + * @version $Revision: 1.3 $, $Date: 2008/11/06 19:59:44 $ */ public class PlainTextGISModelReader extends GISModelReader { - private BufferedReader input; /** * Constructor which directly instantiates the BufferedReader containing @@ -36,7 +38,7 @@ public class PlainTextGISModelReader extends GISModelReader { * @param br The BufferedReader containing the model information. */ public PlainTextGISModelReader (BufferedReader br) { - input = br; + super(new PlainTextFileDataReader(br)); } /** @@ -47,27 +49,6 @@ public PlainTextGISModelReader (BufferedReader br) { * @param f The File in which the model is stored. */ public PlainTextGISModelReader (File f) throws IOException { - - if (f.getName().endsWith(".gz")) { - input = new BufferedReader(new InputStreamReader( - new GZIPInputStream(new FileInputStream(f)))); - } - else { - input = new BufferedReader(new FileReader(f)); - } - - } - - public int readInt () throws IOException { - return Integer.parseInt(input.readLine()); + super(f); } - - public double readDouble () throws IOException { - return Double.parseDouble(input.readLine()); - } - - public String readUTF () throws IOException { - return input.readLine(); - } - } diff --git a/src/java/opennlp/maxent/io/SuffixSensitiveGISModelReader.java b/src/java/opennlp/maxent/io/SuffixSensitiveGISModelReader.java index 6ca1ff4..319ba59 100644 --- a/src/java/opennlp/maxent/io/SuffixSensitiveGISModelReader.java +++ b/src/java/opennlp/maxent/io/SuffixSensitiveGISModelReader.java @@ -39,7 +39,7 @@ *

  • .bin --> the file is binary * * @author Jason Baldridge - * @version $Revision: 1.4 $, $Date: 2008/09/28 18:04:24 $ + * @version $Revision: 1.5 $, $Date: 2008/11/06 19:59:44 $ */ public class SuffixSensitiveGISModelReader extends GISModelReader { protected GISModelReader suffixAppropriateReader; @@ -51,42 +51,9 @@ public class SuffixSensitiveGISModelReader extends GISModelReader { * @param f The File in which the model is stored. */ public SuffixSensitiveGISModelReader (File f) throws IOException { - InputStream input; - String filename = f.getName(); - - // handle the zipped/not zipped distinction - if (filename.endsWith(".gz")) { - input = new GZIPInputStream(new FileInputStream(f)); - filename = filename.substring(0,filename.length()-3); - } - else { - input = new FileInputStream(f); - } - - // handle the different formats - if (filename.endsWith(".bin")) { - suffixAppropriateReader = - new BinaryGISModelReader(new DataInputStream(input)); - } - // add more else ifs here to add further Reader types, e.g. - // else if (filename.endsWith(".xml")) - // suffixAppropriateReader = new XmlGISModelReader(input); - // of course, a BufferedReader may not be what is wanted here, - // so you might have to do a bit more to get - // SuffixSensitiveGISModelReader to work for xml or other formats. - // However, the default should be plain text (.txt). - else { // filename ends with ".txt" - suffixAppropriateReader = - new PlainTextGISModelReader( - new BufferedReader(new InputStreamReader(input))); - } - + super(f); } - protected SuffixSensitiveGISModelReader() { - super(); - } - // activate this if adding another type of reader which can't read model // information in the way that the default getModel() method in // GISModelReader does. @@ -95,18 +62,6 @@ protected SuffixSensitiveGISModelReader() { //} - public int readInt () throws IOException { - return suffixAppropriateReader.readInt(); - } - - public double readDouble () throws IOException { - return suffixAppropriateReader.readDouble(); - } - - public String readUTF () throws IOException { - return suffixAppropriateReader.readUTF(); - } - /** * To convert between different formats of the new style. * diff --git a/src/java/opennlp/model/AbstractEventStream.java b/src/java/opennlp/model/AbstractEventStream.java new file mode 100644 index 0000000..8562772 --- /dev/null +++ b/src/java/opennlp/model/AbstractEventStream.java @@ -0,0 +1,14 @@ +package opennlp.model; + + +public abstract class AbstractEventStream implements EventStream { + + public AbstractEventStream() { + super(); + } + + public void remove() { + throw new UnsupportedOperationException(); + } + +} diff --git a/src/java/opennlp/model/AbstractModel.java b/src/java/opennlp/model/AbstractModel.java index 7266842..ee6317f 100644 --- a/src/java/opennlp/model/AbstractModel.java +++ b/src/java/opennlp/model/AbstractModel.java @@ -31,12 +31,12 @@ public abstract class AbstractModel implements MaxentModel { protected EvalParameters evalParams; protected Prior prior; - public AbstractModel (Context[] params, String[] predLabels, String[] outcomeNames) { + public AbstractModel(Context[] params, String[] predLabels, String[] outcomeNames) { init(predLabels,outcomeNames); this.evalParams = new EvalParameters(params,ocNames.length); } - public AbstractModel (Context[] params, String[] predLabels, String[] outcomeNames, int correctionConstant,double correctionParam) { + public AbstractModel(Context[] params, String[] predLabels, String[] outcomeNames, int correctionConstant,double correctionParam) { init(predLabels,outcomeNames); this.evalParams = new EvalParameters(params,correctionConstant,correctionParam,ocNames.length); } @@ -121,7 +121,7 @@ public int getIndex(String outcome) { } public int getNumOutcomes() { - return(evalParams.numOutcomes); + return(evalParams.getNumOutcomes()); } /** @@ -146,11 +146,11 @@ public int getNumOutcomes() { */ public final Object[] getDataStructures() { Object[] data = new Object[5]; - data[0] = evalParams.params; + data[0] = evalParams.getParams(); data[1] = pmap; data[2] = ocNames; - data[3] = new Integer((int)evalParams.correctionConstant); - data[4] = new Double(evalParams.correctionParam); + data[3] = new Integer((int)evalParams.getCorrectionConstant()); + data[4] = new Double(evalParams.getCorrectionParam()); return data; } } diff --git a/src/java/opennlp/model/AbstractModelReader.java b/src/java/opennlp/model/AbstractModelReader.java index c562ec2..e837b36 100644 --- a/src/java/opennlp/model/AbstractModelReader.java +++ b/src/java/opennlp/model/AbstractModelReader.java @@ -17,7 +17,12 @@ package opennlp.model; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; import java.util.StringTokenizer; +import java.util.zip.GZIPInputStream; public abstract class AbstractModelReader { @@ -26,29 +31,63 @@ public abstract class AbstractModelReader { * The number of predicates contained in the model. */ protected int NUM_PREDS; + protected DataReader dataReader; + + public AbstractModelReader(File f) throws IOException { + String filename = f.getName(); + InputStream input; + // handle the zipped/not zipped distinction + if (filename.endsWith(".gz")) { + input = new GZIPInputStream(new FileInputStream(f)); + filename = filename.substring(0,filename.length()-3); + } + else { + input = new FileInputStream(f); + } - public AbstractModelReader() { - super(); + // handle the different formats + if (filename.endsWith(".bin")) { + this.dataReader = new BinaryFileDataReader(input); + } + else { // filename ends with ".txt" + this.dataReader = new PlainTextFileDataReader(input); + } } + public AbstractModelReader(DataReader dataReader) { + super(); + this.dataReader = dataReader; + } + /** * Implement as needed for the format the model is stored in. */ - public abstract int readInt() throws java.io.IOException; + public int readInt() throws java.io.IOException { + return dataReader.readInt(); + } /** * Implement as needed for the format the model is stored in. */ - public abstract double readDouble() throws java.io.IOException; + public double readDouble() throws java.io.IOException { + return dataReader.readDouble(); + } /** * Implement as needed for the format the model is stored in. */ - public abstract String readUTF() throws java.io.IOException; + public String readUTF() throws java.io.IOException { + return dataReader.readUTF(); + } - public abstract AbstractModel getModel () throws java.io.IOException; + public AbstractModel getModel() throws IOException { + checkModelType(); + return constructModel(); + } public abstract void checkModelType() throws java.io.IOException; + + public abstract AbstractModel constructModel() throws java.io.IOException; protected String[] getOutcomes() throws java.io.IOException { int numOutcomes = readInt(); diff --git a/src/java/opennlp/model/BinaryFileDataReader.java b/src/java/opennlp/model/BinaryFileDataReader.java new file mode 100644 index 0000000..596cc4c --- /dev/null +++ b/src/java/opennlp/model/BinaryFileDataReader.java @@ -0,0 +1,44 @@ +package opennlp.model; + +import java.io.DataInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.GZIPInputStream; + +public class BinaryFileDataReader implements DataReader { + + private DataInputStream input; + + public BinaryFileDataReader(File f) throws IOException { + if (f.getName().endsWith(".gz")) { + input = new DataInputStream( + new GZIPInputStream(new FileInputStream(f))); + } + else { + input = new DataInputStream(new FileInputStream(f)); + } + } + + public BinaryFileDataReader(InputStream in) { + input = new DataInputStream(in); + } + + public BinaryFileDataReader(DataInputStream in) { + input = in; + } + + public double readDouble() throws IOException { + return input.readDouble(); + } + + public int readInt() throws IOException { + return input.readInt(); + } + + public String readUTF() throws IOException { + return input.readUTF(); + } + +} diff --git a/src/java/opennlp/model/DataReader.java b/src/java/opennlp/model/DataReader.java new file mode 100644 index 0000000..d0c30a1 --- /dev/null +++ b/src/java/opennlp/model/DataReader.java @@ -0,0 +1,12 @@ +package opennlp.model; + +import java.io.IOException; + +public interface DataReader { + + public double readDouble() throws IOException; + + public int readInt() throws IOException; + + public String readUTF() throws IOException; +} diff --git a/src/java/opennlp/model/DynamicEvalParameters.java b/src/java/opennlp/model/DynamicEvalParameters.java new file mode 100644 index 0000000..233aa10 --- /dev/null +++ b/src/java/opennlp/model/DynamicEvalParameters.java @@ -0,0 +1,33 @@ +package opennlp.model; + +import java.util.List; + +public class DynamicEvalParameters { + + /** Mapping between outcomes and paramater values for each context. + * The integer representation of the context can be found using pmap.*/ + private List params; + + /** The number of outcomes being predicted. */ + private final int numOutcomes; + + + /** + * Creates a set of paramters which can be evaulated with the eval method. + * @param params The parameters of the model. + * @param numOutcomes The number of outcomes. + */ + public DynamicEvalParameters(List params, int numOutcomes) { + this.params = params; + this.numOutcomes = numOutcomes; + } + + public Context[] getParams() { + return params.toArray(new Context[params.size()]); + } + + public int getNumOutcomes() { + return numOutcomes; + } + +} diff --git a/src/java/opennlp/model/EvalParameters.java b/src/java/opennlp/model/EvalParameters.java index 3df3afd..30f6d35 100644 --- a/src/java/opennlp/model/EvalParameters.java +++ b/src/java/opennlp/model/EvalParameters.java @@ -29,22 +29,18 @@ public class EvalParameters { /** Mapping between outcomes and paramater values for each context. * The integer representation of the context can be found using pmap.*/ - public Context[] params; + private Context[] params; /** The number of outcomes being predicted. */ - public final int numOutcomes; + private final int numOutcomes; /** The maximum number of feattures fired in an event. Usually refered to a C. * This is used to normalize the number of features which occur in an event. */ - public double correctionConstant; + private double correctionConstant; /** Stores inverse of the correction constant, 1/C. */ - public final double constantInverse; + private final double constantInverse; /** The correction parameter of the model. */ - public double correctionParam; - /** Log of 1/C; initial value of probabilities. */ - private final double iprob; + private double correctionParam; - private String[] outcomeLabels; - /** * Creates a set of paramters which can be evaulated with the eval method. * @param params The parameters of the model. @@ -58,26 +54,39 @@ public EvalParameters(Context[] params, double correctionParam, double correctio this.numOutcomes = numOutcomes; this.correctionConstant = correctionConstant; this.constantInverse = 1.0 / correctionConstant; - this.iprob = Math.log(1.0/numOutcomes); } public EvalParameters(Context[] params, int numOutcomes) { this(params,0,0,numOutcomes); } - public EvalParameters(Context[] params, int numOutcomes, String[] outcomeLabels) { - this(params,0,0,numOutcomes); - this.outcomeLabels = outcomeLabels; + /* (non-Javadoc) + * @see opennlp.model.EvalParameters#getParams() + */ + public Context[] getParams() { + return params; } - - public void setOutcomes(String[] outcomeLabels) { - this.outcomeLabels = outcomeLabels; + + /* (non-Javadoc) + * @see opennlp.model.EvalParameters#getNumOutcomes() + */ + public int getNumOutcomes() { + return numOutcomes; + } + + public double getCorrectionConstant() { + return correctionConstant; + } + + public double getConstantInverse() { + return constantInverse; + } + + public double getCorrectionParam() { + return correctionParam; } - public String getOutcomeLabel(int oi) { - if (outcomeLabels != null) { - return outcomeLabels[oi]; - } - return null; + public void setCorrectionParam(double correctionParam) { + this.correctionParam = correctionParam; } } \ No newline at end of file diff --git a/src/java/opennlp/model/EventCollectorAsStream.java b/src/java/opennlp/model/EventCollectorAsStream.java index a412bd3..647524e 100644 --- a/src/java/opennlp/model/EventCollectorAsStream.java +++ b/src/java/opennlp/model/EventCollectorAsStream.java @@ -26,7 +26,7 @@ * @author Jason Baldridge * @version $Revision$, $Date$ */ -public final class EventCollectorAsStream implements EventStream { +public final class EventCollectorAsStream extends AbstractEventStream { final Event[] events; final int numEvents; int index = 0; @@ -36,12 +36,12 @@ public EventCollectorAsStream (EventCollector ec) { numEvents = events.length; } - public Event nextEvent () { - return events[index++]; + public Event next () { + return events[index++]; } public boolean hasNext () { - return (index < numEvents); + return (index < numEvents); } } diff --git a/src/java/opennlp/model/EventStream.java b/src/java/opennlp/model/EventStream.java index 370cd9f..7bc0c99 100644 --- a/src/java/opennlp/model/EventStream.java +++ b/src/java/opennlp/model/EventStream.java @@ -17,6 +17,8 @@ package opennlp.model; +import java.util.Iterator; + /** * A object which can deliver a stream of training events for the GIS * procedure (or others such as IIS if and when they are implemented). @@ -28,14 +30,14 @@ * @version $Revision$, $Date$ * */ -public interface EventStream { +public interface EventStream extends Iterator{ /** * Returns the next Event object held in this EventStream. * * @return the Event object which is next in this EventStream */ - public Event nextEvent (); + public Event next (); /** * Test whether there are any Events remaining in this EventStream. @@ -43,5 +45,6 @@ public interface EventStream { * @return true if this EventStream has more Events */ public boolean hasNext (); + } diff --git a/src/java/opennlp/model/FileEventStream.java b/src/java/opennlp/model/FileEventStream.java index 766cb4e..7cc9ee1 100644 --- a/src/java/opennlp/model/FileEventStream.java +++ b/src/java/opennlp/model/FileEventStream.java @@ -34,7 +34,7 @@ * @author Tom Morton * */ -public class FileEventStream implements EventStream { +public class FileEventStream extends AbstractEventStream { BufferedReader reader; String line; @@ -76,7 +76,7 @@ public boolean hasNext() { } } - public Event nextEvent() { + public Event next() { StringTokenizer st = new StringTokenizer(line); String outcome = st.nextToken(); int count = st.countTokens(); diff --git a/src/java/opennlp/model/GenericModelReader.java b/src/java/opennlp/model/GenericModelReader.java new file mode 100644 index 0000000..8086380 --- /dev/null +++ b/src/java/opennlp/model/GenericModelReader.java @@ -0,0 +1,31 @@ +package opennlp.model; + +import java.io.File; +import java.io.IOException; + +import opennlp.maxent.io.GISModelReader; +import opennlp.perceptron.PerceptronModelReader; + +public class GenericModelReader extends AbstractModelReader { + + private AbstractModelReader delegateModelReader; + + public GenericModelReader (File f) throws IOException { + super(f); + } + + public void checkModelType() throws IOException { + String modelType = readUTF(); + if (modelType.equals("Perceptron")) { + delegateModelReader = new PerceptronModelReader(this.dataReader); + } + else if (modelType.equals("Maxent")) { + delegateModelReader = new GISModelReader(this.dataReader); + } + } + + + public AbstractModel constructModel() throws IOException { + return delegateModelReader.constructModel(); + } +} diff --git a/src/java/opennlp/model/ObjectDataReader.java b/src/java/opennlp/model/ObjectDataReader.java new file mode 100644 index 0000000..4afc09e --- /dev/null +++ b/src/java/opennlp/model/ObjectDataReader.java @@ -0,0 +1,26 @@ +package opennlp.model; + +import java.io.IOException; +import java.io.ObjectInputStream; + +public class ObjectDataReader implements DataReader { + + protected ObjectInputStream ois; + + public ObjectDataReader(ObjectInputStream ois) { + this.ois = ois; + } + + public double readDouble() throws IOException { + return ois.readDouble(); + } + + public int readInt() throws IOException { + return ois.readInt(); + } + + public String readUTF() throws IOException { + return ois.readUTF(); + } + +} diff --git a/src/java/opennlp/model/OnePassDataIndexer.java b/src/java/opennlp/model/OnePassDataIndexer.java index de8a825..018f167 100644 --- a/src/java/opennlp/model/OnePassDataIndexer.java +++ b/src/java/opennlp/model/OnePassDataIndexer.java @@ -104,7 +104,7 @@ private LinkedList computeEventCounts(EventStream eventStream,Map counter = new HashMap(); LinkedList events = new LinkedList(); while (eventStream.hasNext()) { - Event ev = eventStream.nextEvent(); + Event ev = eventStream.next(); events.addLast(ev); update(ev.getContext(),predicateSet,counter,cutoff); } diff --git a/src/java/opennlp/model/PlainTextFileDataReader.java b/src/java/opennlp/model/PlainTextFileDataReader.java new file mode 100644 index 0000000..970289a --- /dev/null +++ b/src/java/opennlp/model/PlainTextFileDataReader.java @@ -0,0 +1,44 @@ +package opennlp.model; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.zip.GZIPInputStream; + +public class PlainTextFileDataReader implements DataReader { + + private BufferedReader input; + + public PlainTextFileDataReader(File f) throws IOException { + if (f.getName().endsWith(".gz")) { + input = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(f)))); + } + else { + input = new BufferedReader(new InputStreamReader(new FileInputStream(f))); + } + } + + public PlainTextFileDataReader(InputStream in) { + input = new BufferedReader(new InputStreamReader(in)); + } + + public PlainTextFileDataReader(BufferedReader in) { + input = in; + } + + public double readDouble() throws IOException { + return Double.parseDouble(input.readLine()); + } + + public int readInt() throws IOException { + return Integer.parseInt(input.readLine()); + } + + public String readUTF() throws IOException { + return input.readLine(); + } + +} diff --git a/src/java/opennlp/model/RealValueFileEventStream.java b/src/java/opennlp/model/RealValueFileEventStream.java index c778be9..b2b85ca 100644 --- a/src/java/opennlp/model/RealValueFileEventStream.java +++ b/src/java/opennlp/model/RealValueFileEventStream.java @@ -74,7 +74,7 @@ public static float[] parseContexts(String[] contexts) { return values; } - public Event nextEvent() { + public Event next() { int si = line.indexOf(' '); String outcome = line.substring(0,si); String[] contexts = line.substring(si+1).split(" "); diff --git a/src/java/opennlp/model/TwoPassDataIndexer.java b/src/java/opennlp/model/TwoPassDataIndexer.java index cb1f766..e2282b1 100644 --- a/src/java/opennlp/model/TwoPassDataIndexer.java +++ b/src/java/opennlp/model/TwoPassDataIndexer.java @@ -112,7 +112,7 @@ private int computeEventCounts(EventStream eventStream, Writer eventStore, Map predicateI List eventsToCompare = new ArrayList(numEvents); List indexedContext = new ArrayList(); while (es.hasNext()) { - Event ev = es.nextEvent(); + Event ev = es.next(); String[] econtext = ev.getContext(); ComparableEvent ce; diff --git a/src/java/opennlp/perceptron/BinaryPerceptronModelReader.java b/src/java/opennlp/perceptron/BinaryPerceptronModelReader.java new file mode 100644 index 0000000..f9e6a8e --- /dev/null +++ b/src/java/opennlp/perceptron/BinaryPerceptronModelReader.java @@ -0,0 +1,32 @@ +package opennlp.perceptron; + +import java.io.DataInputStream; +import java.io.File; +import java.io.IOException; + +import opennlp.model.BinaryFileDataReader; + +public class BinaryPerceptronModelReader extends PerceptronModelReader { + + + /** + * Constructor which directly instantiates the DataInputStream containing + * the model contents. + * + * @param dis The DataInputStream containing the model information. + */ + public BinaryPerceptronModelReader(DataInputStream dis) { + super(new BinaryFileDataReader(dis)); + } + + /** + * Constructor which takes a File and creates a reader for it. Detects + * whether the file is gzipped or not based on whether the suffix contains + * ".gz" + * + * @param f The File in which the model is stored. + */ + public BinaryPerceptronModelReader (File f) throws IOException { + super(f); + } +} diff --git a/src/java/opennlp/perceptron/BinaryPerceptronModelWriter.java b/src/java/opennlp/perceptron/BinaryPerceptronModelWriter.java new file mode 100644 index 0000000..ba72c7e --- /dev/null +++ b/src/java/opennlp/perceptron/BinaryPerceptronModelWriter.java @@ -0,0 +1,87 @@ +/////////////////////////////////////////////////////////////////////////////// +//Copyright (C) 2001 Jason Baldridge and Gann Bierner + +//This library is free software; you can redistribute it and/or +//modify it under the terms of the GNU Lesser General Public +//License as published by the Free Software Foundation; either +//version 2.1 of the License, or (at your option) any later version. + +//This library is distributed in the hope that it will be useful, +//but WITHOUT ANY WARRANTY; without even the implied warranty of +//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//GNU General Public License for more details. + +//You should have received a copy of the GNU Lesser General Public +//License along with this program; if not, write to the Free Software +//Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +package opennlp.perceptron; + +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.zip.GZIPOutputStream; + +import opennlp.model.AbstractModel; + +/** + * Model writer that saves models in binary format. + * + * @author Jason Baldridge + * @version $Revision: 1.1 $, $Date: 2008/11/06 19:59:44 $ + */ +public class BinaryPerceptronModelWriter extends PerceptronModelWriter { + DataOutputStream output; + + /** + * Constructor which takes a GISModel and a File and prepares itself to + * write the model to that file. Detects whether the file is gzipped or not + * based on whether the suffix contains ".gz". + * + * @param model The GISModel which is to be persisted. + * @param f The File in which the model is to be persisted. + */ + public BinaryPerceptronModelWriter (AbstractModel model, File f) throws IOException { + + super(model); + + if (f.getName().endsWith(".gz")) { + output = new DataOutputStream( + new GZIPOutputStream(new FileOutputStream(f))); + } + else { + output = new DataOutputStream(new FileOutputStream(f)); + } + } + + /** + * Constructor which takes a GISModel and a DataOutputStream and prepares + * itself to write the model to that stream. + * + * @param model The GISModel which is to be persisted. + * @param dos The stream which will be used to persist the model. + */ + public BinaryPerceptronModelWriter (AbstractModel model, DataOutputStream dos) { + super(model); + output = dos; + } + + protected void writeUTF (String s) throws java.io.IOException { + output.writeUTF(s); + } + + protected void writeInt (int i) throws java.io.IOException { + output.writeInt(i); + } + + protected void writeDouble (double d) throws java.io.IOException { + output.writeDouble(d); + } + + protected void close () throws java.io.IOException { + output.flush(); + output.close(); + } + +} diff --git a/src/java/opennlp/perceptron/PerceptronModel.java b/src/java/opennlp/perceptron/PerceptronModel.java new file mode 100644 index 0000000..06ed7e2 --- /dev/null +++ b/src/java/opennlp/perceptron/PerceptronModel.java @@ -0,0 +1,109 @@ +package opennlp.perceptron; + +import java.io.BufferedReader; +import java.io.File; +import java.io.InputStreamReader; +import java.text.DecimalFormat; + +import opennlp.model.AbstractModel; +import opennlp.model.Context; +import opennlp.model.EvalParameters; + +public class PerceptronModel extends AbstractModel { + + + public PerceptronModel(Context[] params, String[] predLabels, String[] outcomeNames) { + super(params,predLabels,outcomeNames); + } + + public double[] eval(String[] context) { + return eval(context,new double[evalParams.getNumOutcomes()]); + } + + public double[] eval(String[] context, float[] values) { + return eval(context,values,new double[evalParams.getNumOutcomes()]); + } + + public double[] eval(String[] context, double[] probs) { + return eval(context,null,probs); + } + + public double[] eval(String[] context, float[] values,double[] outsums) { + int[] scontexts = new int[context.length]; + java.util.Arrays.fill(outsums, 0); + for (int i=0; i= 0) { + Context predParams = params[context[ci]]; + activeOutcomes = predParams.getOutcomes(); + activeParameters = predParams.getParameters(); + if (values != null) { + value = values[ci]; + } + for (int ai = 0; ai < activeOutcomes.length; ai++) { + int oid = activeOutcomes[ai]; + prior[oid] += activeParameters[ai] * value; + } + } + } + if (normalize) { + double normal = 0.0; + double min = prior[0]; + for (int oid = 0; oid < model.getNumOutcomes(); oid++) { + if (prior[oid] < min) { + min = prior[oid]; + } + } + for (int oid = 0; oid < model.getNumOutcomes(); oid++) { + if (min < 0) { + prior[oid]+=(-1*min); + } + normal += prior[oid]; + } + if (normal == 0.0) { + for (int oid = 0; oid < model.getNumOutcomes(); oid++) { + prior[oid] = (double) 1/model.getNumOutcomes(); + } + } + else { + for (int oid = 0; oid < model.getNumOutcomes(); oid++) { + prior[oid] /= normal; + } + } + } + return prior; + } + + public static void main(String[] args) throws java.io.IOException { + if (args.length == 0) { + System.err.println("Usage: PerceptronModel modelname < contexts"); + System.exit(1); + } + AbstractModel m = new PerceptronModelReader(new File(args[0])).getModel(); + BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); + DecimalFormat df = new java.text.DecimalFormat(".###"); + for (String line = in.readLine(); line != null; line = in.readLine()) { + String[] context = line.split(" "); + double[] dist = m.eval(context); + for (int oi=0;oiPerceptron (model type identifier) + *
    1. # of parameters (int) + *
    2. # of outcomes (int) + *
    * list of outcome names (String) + *
    3. # of different types of outcome patterns (int) + *
    * list of (int int[]) + *
    [# of predicates for which outcome pattern is true] [outcome pattern] + *
    4. # of predicates (int) + *
    * list of predicate names (String) + * + *

    If you are creating a reader for a format which won't work with this + * (perhaps a database or xml file), override this method and ignore the + * other methods provided in this abstract class. + * + * @return The PerceptronModel stored in the format and location specified to + * this PerceptronModelReader (usually via its the constructor). + */ + public AbstractModel constructModel() throws IOException { + String[] outcomeLabels = getOutcomes(); + int[][] outcomePatterns = getOutcomePatterns(); + String[] predLabels = getPredicates(); + Context[] params = getParameters(outcomePatterns); + + return new PerceptronModel(params, + predLabels, + outcomeLabels); + } + + public void checkModelType() throws java.io.IOException { + String modelType = readUTF(); + if (!modelType.equals("Perceptron")) + System.out.println("Error: attempting to load a "+modelType+ + " model as a Perceptron model."+ + " You should expect problems."); + } +} diff --git a/src/java/opennlp/perceptron/PerceptronModelWriter.java b/src/java/opennlp/perceptron/PerceptronModelWriter.java new file mode 100644 index 0000000..c9ccbfe --- /dev/null +++ b/src/java/opennlp/perceptron/PerceptronModelWriter.java @@ -0,0 +1,151 @@ +package opennlp.perceptron; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import opennlp.model.AbstractModel; +import opennlp.model.ComparablePredicate; +import opennlp.model.Context; + +/** + * Abstract parent class for Perceptron writers. It provides the persist method + * which takes care of the structure of a stored document, and requires an + * extending class to define precisely how the data should be stored. + * + * @author Jason Baldridge + * @version $Revision: 1.1 $, $Date: 2008/11/06 19:59:44 $ + */ +public abstract class PerceptronModelWriter { + protected Context[] PARAMS; + protected String[] OUTCOME_LABELS; + protected String[] PRED_LABELS; + int numOutcomes; + + public PerceptronModelWriter (AbstractModel model) { + + Object[] data = model.getDataStructures(); + this.numOutcomes = model.getNumOutcomes(); + PARAMS = (Context[]) data[0]; + Map pmap = (Map)data[1]; + OUTCOME_LABELS = (String[])data[2]; + + PRED_LABELS = new String[pmap.size()]; + for (String pred : pmap.keySet()) { + PRED_LABELS[pmap.get(pred)] = pred; + } + } + + protected abstract void writeUTF (String s) throws java.io.IOException; + protected abstract void writeInt (int i) throws java.io.IOException; + protected abstract void writeDouble (double d) throws java.io.IOException; + protected abstract void close () throws java.io.IOException; + + /** + * Writes the model to disk, using the writeX() methods + * provided by extending classes. + * + *

    If you wish to create a PerceptronModelWriter which uses a different + * structure, it will be necessary to override the persist method in + * addition to implementing the writeX() methods. + */ + public void persist() throws IOException { + + // the type of model (GIS) + writeUTF("Perceptron"); + + // the mapping from outcomes to their integer indexes + writeInt(OUTCOME_LABELS.length); + + for (int i=0; i modelDistribution[max]) { + max = oi; + } + } + if (max == outcomeList[ei]) { + numCorrect += numTimesEventsSeen[ei]; + } + for (int oi = 0;oi "+averageParams[pi].getParameters()[oi]); + updates[pi][oi][VALUE] = (int) params[pi].getParameters()[oi]; + updates[pi][oi][ITER] = iteration; + updates[pi][oi][EVENT] = ei; + } + } + } + } + else { + if (modelDistribution[oi] > 0) { + for (int ci = 0; ci < contexts[ei].length; ci++) { + int pi = contexts[ei][ci]; + if (values == null) { + params[pi].updateParameter(oi,-1); + } + else { + params[pi].updateParameter(oi, values[ei][ci]*-1); + } + if (useAverage) { + if (updates[pi][oi][VALUE] != 0) { + averageParams[pi].updateParameter(oi,updates[pi][oi][VALUE]*(numEvents*(iteration-updates[pi][oi][ITER])+(ei-updates[pi][oi][EVENT]))); + } + //System.err.println("updates["+pi+"]["+oi+"]=("+updates[pi][oi][ITER]+","+updates[pi][oi][EVENT]+","+updates[pi][oi][VALUE]+") + ("+iteration+","+ei+","+params[pi].getParameters()[oi]+") -> "+averageParams[pi].getParameters()[oi]); + updates[pi][oi][VALUE] = (int) params[pi].getParameters()[oi]; + updates[pi][oi][ITER] = iteration; + updates[pi][oi][EVENT] = ei; + } + } + } + } + } + } + } + //finish average computation + double totIterations = (double) iterations*numEvents; + if (useAverage && iteration == iterations-1) { + for (int pi = 0; pi < numPreds; pi++) { + double[] predParams = averageParams[pi].getParameters(); + for (int oi = 0;oi "+averageParams[pi].getParameters()[oi]); + } + } + } + } + display(". "+((double) numCorrect / numEvents) + "\n"); + } +} diff --git a/src/java/opennlp/perceptron/PlainTextPerceptronModelReader.java b/src/java/opennlp/perceptron/PlainTextPerceptronModelReader.java new file mode 100644 index 0000000..fa964fd --- /dev/null +++ b/src/java/opennlp/perceptron/PlainTextPerceptronModelReader.java @@ -0,0 +1,31 @@ +package opennlp.perceptron; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; + +import opennlp.model.PlainTextFileDataReader; + +public class PlainTextPerceptronModelReader extends PerceptronModelReader { + + /** + * Constructor which directly instantiates the BufferedReader containing + * the model contents. + * + * @param br The BufferedReader containing the model information. + */ + public PlainTextPerceptronModelReader(BufferedReader br) { + super(new PlainTextFileDataReader(br)); + } + + /** + * Constructor which takes a File and creates a reader for it. Detects + * whether the file is gzipped or not based on whether the suffix contains + * ".gz". + * + * @param f The File in which the model is stored. + */ + public PlainTextPerceptronModelReader (File f) throws IOException { + super(f); + } +} diff --git a/src/java/opennlp/perceptron/PlainTextPerceptronModelWriter.java b/src/java/opennlp/perceptron/PlainTextPerceptronModelWriter.java new file mode 100644 index 0000000..63627ba --- /dev/null +++ b/src/java/opennlp/perceptron/PlainTextPerceptronModelWriter.java @@ -0,0 +1,93 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2001 Jason Baldridge and Gann Bierner +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +////////////////////////////////////////////////////////////////////////////// +package opennlp.perceptron; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.util.zip.GZIPOutputStream; + +import opennlp.model.AbstractModel; + +/** + * Model writer that saves models in plain text format. + * + * @author Jason Baldridge + * @version $Revision: 1.1 $, $Date: 2008/11/06 19:59:44 $ + */ +public class PlainTextPerceptronModelWriter extends PerceptronModelWriter { + BufferedWriter output; + + /** + * Constructor which takes a PerceptronModel and a File and prepares itself to + * write the model to that file. Detects whether the file is gzipped or not + * based on whether the suffix contains ".gz". + * + * @param model The PerceptronModel which is to be persisted. + * @param f The File in which the model is to be persisted. + */ + public PlainTextPerceptronModelWriter (AbstractModel model, File f) + throws IOException, FileNotFoundException { + + super(model); + if (f.getName().endsWith(".gz")) { + output = new BufferedWriter(new OutputStreamWriter( + new GZIPOutputStream(new FileOutputStream(f)))); + } + else { + output = new BufferedWriter(new FileWriter(f)); + } + } + + /** + * Constructor which takes a PerceptronModel and a BufferedWriter and prepares + * itself to write the model to that writer. + * + * @param model The PerceptronModel which is to be persisted. + * @param bw The BufferedWriter which will be used to persist the model. + */ + public PlainTextPerceptronModelWriter (AbstractModel model, BufferedWriter bw) { + super(model); + output = bw; + } + + protected void writeUTF (String s) throws java.io.IOException { + output.write(s); + output.newLine(); + } + + protected void writeInt (int i) throws java.io.IOException { + output.write(Integer.toString(i)); + output.newLine(); + } + + protected void writeDouble (double d) throws java.io.IOException { + output.write(Double.toString(d)); + output.newLine(); + } + + protected void close () throws java.io.IOException { + output.flush(); + output.close(); + } + +} diff --git a/src/java/opennlp/perceptron/SuffixSensitivePerceptronModelWriter.java b/src/java/opennlp/perceptron/SuffixSensitivePerceptronModelWriter.java new file mode 100644 index 0000000..f063896 --- /dev/null +++ b/src/java/opennlp/perceptron/SuffixSensitivePerceptronModelWriter.java @@ -0,0 +1,99 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2001 Jason Baldridge and Gann Bierner +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +////////////////////////////////////////////////////////////////////////////// +package opennlp.perceptron; + +import java.io.BufferedWriter; +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.util.zip.GZIPOutputStream; + +import opennlp.model.AbstractModel; + +/** + * A writer for GIS models which inspects the filename and invokes the + * appropriate GISModelWriter depending on the filename's suffixes. + * + *

    The following assumption are made about suffixes: + *

  • .gz --> the file is gzipped (must be the last suffix) + *
  • .txt --> the file is plain text + *
  • .bin --> the file is binary + * + * @author Jason Baldridge + * @version $Revision: 1.1 $, $Date: 2008/11/06 19:59:44 $ + */ +public class SuffixSensitivePerceptronModelWriter extends PerceptronModelWriter { + private final PerceptronModelWriter suffixAppropriateWriter; + + /** + * Constructor which takes a GISModel and a File and invokes the + * GISModelWriter appropriate for the suffix. + * + * @param model The GISModel which is to be persisted. + * @param f The File in which the model is to be stored. + */ + public SuffixSensitivePerceptronModelWriter (AbstractModel model, File f) + throws IOException { + + super (model); + + OutputStream output; + String filename = f.getName(); + + // handle the zipped/not zipped distinction + if (filename.endsWith(".gz")) { + output = new GZIPOutputStream(new FileOutputStream(f)); + filename = filename.substring(0,filename.length()-3); + } + else { + output = new DataOutputStream(new FileOutputStream(f)); + } + + // handle the different formats + if (filename.endsWith(".bin")) { + suffixAppropriateWriter = + new BinaryPerceptronModelWriter(model, + new DataOutputStream(output)); + } + else { // default is ".txt" + suffixAppropriateWriter = + new PlainTextPerceptronModelWriter(model, + new BufferedWriter(new OutputStreamWriter(output))); + } + } + + protected void writeUTF (String s) throws java.io.IOException { + suffixAppropriateWriter.writeUTF(s); + } + + protected void writeInt (int i) throws java.io.IOException { + suffixAppropriateWriter.writeInt(i); + } + + protected void writeDouble (double d) throws java.io.IOException { + suffixAppropriateWriter.writeDouble(d); + } + + protected void close () throws java.io.IOException { + suffixAppropriateWriter.close(); + } + +}